aws-bootstrap-g4dn 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aws_bootstrap/cli.py +502 -86
- aws_bootstrap/config.py +2 -0
- aws_bootstrap/ec2.py +137 -8
- aws_bootstrap/output.py +106 -0
- aws_bootstrap/resources/remote_setup.sh +2 -2
- aws_bootstrap/ssh.py +142 -20
- aws_bootstrap/tests/test_cli.py +652 -4
- aws_bootstrap/tests/test_config.py +18 -0
- aws_bootstrap/tests/test_ebs.py +245 -0
- aws_bootstrap/tests/test_output.py +192 -0
- aws_bootstrap/tests/test_ssh_config.py +76 -0
- aws_bootstrap/tests/test_ssh_ebs.py +76 -0
- {aws_bootstrap_g4dn-0.5.0.dist-info → aws_bootstrap_g4dn-0.7.0.dist-info}/METADATA +79 -7
- aws_bootstrap_g4dn-0.7.0.dist-info/RECORD +31 -0
- aws_bootstrap_g4dn-0.5.0.dist-info/RECORD +0 -27
- {aws_bootstrap_g4dn-0.5.0.dist-info → aws_bootstrap_g4dn-0.7.0.dist-info}/WHEEL +0 -0
- {aws_bootstrap_g4dn-0.5.0.dist-info → aws_bootstrap_g4dn-0.7.0.dist-info}/entry_points.txt +0 -0
- {aws_bootstrap_g4dn-0.5.0.dist-info → aws_bootstrap_g4dn-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {aws_bootstrap_g4dn-0.5.0.dist-info → aws_bootstrap_g4dn-0.7.0.dist-info}/top_level.txt +0 -0
aws_bootstrap/cli.py
CHANGED
|
@@ -10,8 +10,13 @@ import click
|
|
|
10
10
|
|
|
11
11
|
from .config import LaunchConfig
|
|
12
12
|
from .ec2 import (
|
|
13
|
+
EBS_MOUNT_POINT,
|
|
13
14
|
CLIError,
|
|
15
|
+
attach_ebs_volume,
|
|
16
|
+
create_ebs_volume,
|
|
17
|
+
delete_ebs_volume,
|
|
14
18
|
ensure_security_group,
|
|
19
|
+
find_ebs_volumes_for_instance,
|
|
15
20
|
find_tagged_instances,
|
|
16
21
|
get_latest_ami,
|
|
17
22
|
get_spot_price,
|
|
@@ -19,13 +24,18 @@ from .ec2 import (
|
|
|
19
24
|
list_amis,
|
|
20
25
|
list_instance_types,
|
|
21
26
|
terminate_tagged_instances,
|
|
27
|
+
validate_ebs_volume,
|
|
22
28
|
wait_instance_ready,
|
|
23
29
|
)
|
|
30
|
+
from .output import OutputFormat, emit, is_text
|
|
24
31
|
from .ssh import (
|
|
25
32
|
add_ssh_host,
|
|
33
|
+
cleanup_stale_ssh_hosts,
|
|
34
|
+
find_stale_ssh_hosts,
|
|
26
35
|
get_ssh_host_details,
|
|
27
36
|
import_key_pair,
|
|
28
37
|
list_ssh_hosts,
|
|
38
|
+
mount_ebs_volume,
|
|
29
39
|
private_key_path,
|
|
30
40
|
query_gpu_info,
|
|
31
41
|
remove_ssh_host,
|
|
@@ -39,22 +49,32 @@ SETUP_SCRIPT = Path(__file__).parent / "resources" / "remote_setup.sh"
|
|
|
39
49
|
|
|
40
50
|
|
|
41
51
|
def step(number: int, total: int, msg: str) -> None:
|
|
52
|
+
if not is_text():
|
|
53
|
+
return
|
|
42
54
|
click.secho(f"\n[{number}/{total}] {msg}", bold=True, fg="cyan")
|
|
43
55
|
|
|
44
56
|
|
|
45
57
|
def info(msg: str) -> None:
|
|
58
|
+
if not is_text():
|
|
59
|
+
return
|
|
46
60
|
click.echo(f" {msg}")
|
|
47
61
|
|
|
48
62
|
|
|
49
63
|
def val(label: str, value: str) -> None:
|
|
64
|
+
if not is_text():
|
|
65
|
+
return
|
|
50
66
|
click.echo(f" {label}: " + click.style(str(value), fg="bright_white"))
|
|
51
67
|
|
|
52
68
|
|
|
53
69
|
def success(msg: str) -> None:
|
|
70
|
+
if not is_text():
|
|
71
|
+
return
|
|
54
72
|
click.secho(f" {msg}", fg="green")
|
|
55
73
|
|
|
56
74
|
|
|
57
75
|
def warn(msg: str) -> None:
|
|
76
|
+
if not is_text():
|
|
77
|
+
return
|
|
58
78
|
click.secho(f" WARNING: {msg}", fg="yellow", err=True)
|
|
59
79
|
|
|
60
80
|
|
|
@@ -92,8 +112,19 @@ class _AWSGroup(click.Group):
|
|
|
92
112
|
|
|
93
113
|
@click.group(cls=_AWSGroup)
|
|
94
114
|
@click.version_option(package_name="aws-bootstrap-g4dn")
|
|
95
|
-
|
|
115
|
+
@click.option(
|
|
116
|
+
"--output",
|
|
117
|
+
"-o",
|
|
118
|
+
type=click.Choice(["text", "json", "yaml", "table"], case_sensitive=False),
|
|
119
|
+
default="text",
|
|
120
|
+
show_default=True,
|
|
121
|
+
help="Output format.",
|
|
122
|
+
)
|
|
123
|
+
@click.pass_context
|
|
124
|
+
def main(ctx, output):
|
|
96
125
|
"""Bootstrap AWS EC2 GPU instances for hybrid local-remote development."""
|
|
126
|
+
ctx.ensure_object(dict)
|
|
127
|
+
ctx.obj["output_format"] = OutputFormat(output)
|
|
97
128
|
|
|
98
129
|
|
|
99
130
|
@main.command()
|
|
@@ -120,7 +151,21 @@ def main():
|
|
|
120
151
|
help="Python version for the remote venv (e.g. 3.13, 3.14.2). Passed to uv during setup.",
|
|
121
152
|
)
|
|
122
153
|
@click.option("--ssh-port", default=22, show_default=True, type=int, help="SSH port on the remote instance.")
|
|
154
|
+
@click.option(
|
|
155
|
+
"--ebs-storage",
|
|
156
|
+
default=None,
|
|
157
|
+
type=int,
|
|
158
|
+
help="Create and attach a new EBS data volume (size in GB, gp3). Mounted at /data.",
|
|
159
|
+
)
|
|
160
|
+
@click.option(
|
|
161
|
+
"--ebs-volume-id",
|
|
162
|
+
default=None,
|
|
163
|
+
type=str,
|
|
164
|
+
help="Attach an existing EBS volume by ID (e.g. vol-0abc123). Mounted at /data.",
|
|
165
|
+
)
|
|
166
|
+
@click.pass_context
|
|
123
167
|
def launch(
|
|
168
|
+
ctx,
|
|
124
169
|
instance_type,
|
|
125
170
|
ami_filter,
|
|
126
171
|
spot,
|
|
@@ -134,8 +179,13 @@ def launch(
|
|
|
134
179
|
profile,
|
|
135
180
|
python_version,
|
|
136
181
|
ssh_port,
|
|
182
|
+
ebs_storage,
|
|
183
|
+
ebs_volume_id,
|
|
137
184
|
):
|
|
138
185
|
"""Launch a GPU-accelerated EC2 instance."""
|
|
186
|
+
if ebs_storage is not None and ebs_volume_id is not None:
|
|
187
|
+
raise CLIError("--ebs-storage and --ebs-volume-id are mutually exclusive.")
|
|
188
|
+
|
|
139
189
|
config = LaunchConfig(
|
|
140
190
|
instance_type=instance_type,
|
|
141
191
|
spot=spot,
|
|
@@ -148,6 +198,8 @@ def launch(
|
|
|
148
198
|
dry_run=dry_run,
|
|
149
199
|
ssh_port=ssh_port,
|
|
150
200
|
python_version=python_version,
|
|
201
|
+
ebs_storage=ebs_storage,
|
|
202
|
+
ebs_volume_id=ebs_volume_id,
|
|
151
203
|
)
|
|
152
204
|
if ami_filter:
|
|
153
205
|
config.ami_filter = ami_filter
|
|
@@ -162,49 +214,78 @@ def launch(
|
|
|
162
214
|
session = boto3.Session(profile_name=config.profile, region_name=config.region)
|
|
163
215
|
ec2 = session.client("ec2")
|
|
164
216
|
|
|
217
|
+
has_ebs = config.ebs_storage is not None or config.ebs_volume_id is not None
|
|
218
|
+
total_steps = 7 if has_ebs else 6
|
|
219
|
+
|
|
165
220
|
# Step 1: AMI lookup
|
|
166
|
-
step(1,
|
|
221
|
+
step(1, total_steps, "Looking up AMI...")
|
|
167
222
|
ami = get_latest_ami(ec2, config.ami_filter)
|
|
168
223
|
info(f"Found: {ami['Name']}")
|
|
169
224
|
val("AMI ID", ami["ImageId"])
|
|
170
225
|
|
|
171
226
|
# Step 2: SSH key pair
|
|
172
|
-
step(2,
|
|
227
|
+
step(2, total_steps, "Importing SSH key pair...")
|
|
173
228
|
import_key_pair(ec2, config.key_name, config.key_path)
|
|
174
229
|
|
|
175
230
|
# Step 3: Security group
|
|
176
|
-
step(3,
|
|
231
|
+
step(3, total_steps, "Ensuring security group...")
|
|
177
232
|
sg_id = ensure_security_group(ec2, config.security_group, config.tag_value, ssh_port=config.ssh_port)
|
|
178
233
|
|
|
179
234
|
pricing = "spot" if config.spot else "on-demand"
|
|
180
235
|
|
|
181
236
|
if config.dry_run:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
237
|
+
if is_text(ctx):
|
|
238
|
+
click.echo()
|
|
239
|
+
click.secho("--- Dry Run Summary ---", bold=True, fg="yellow")
|
|
240
|
+
val("Instance type", config.instance_type)
|
|
241
|
+
val("AMI", f"{ami['ImageId']} ({ami['Name']})")
|
|
242
|
+
val("Pricing", pricing)
|
|
243
|
+
val("Key pair", config.key_name)
|
|
244
|
+
val("Security group", sg_id)
|
|
245
|
+
val("Volume", f"{config.volume_size} GB gp3")
|
|
246
|
+
val("Region", config.region)
|
|
247
|
+
val("Remote setup", "yes" if config.run_setup else "no")
|
|
248
|
+
if config.ssh_port != 22:
|
|
249
|
+
val("SSH port", str(config.ssh_port))
|
|
250
|
+
if config.python_version:
|
|
251
|
+
val("Python version", config.python_version)
|
|
252
|
+
if config.ebs_storage:
|
|
253
|
+
val("EBS data volume", f"{config.ebs_storage} GB gp3 (new, mounted at {EBS_MOUNT_POINT})")
|
|
254
|
+
if config.ebs_volume_id:
|
|
255
|
+
val("EBS data volume", f"{config.ebs_volume_id} (existing, mounted at {EBS_MOUNT_POINT})")
|
|
256
|
+
click.echo()
|
|
257
|
+
click.secho("No resources launched (dry-run mode).", fg="yellow")
|
|
258
|
+
else:
|
|
259
|
+
result: dict = {
|
|
260
|
+
"dry_run": True,
|
|
261
|
+
"instance_type": config.instance_type,
|
|
262
|
+
"ami_id": ami["ImageId"],
|
|
263
|
+
"ami_name": ami["Name"],
|
|
264
|
+
"pricing": pricing,
|
|
265
|
+
"key_name": config.key_name,
|
|
266
|
+
"security_group": sg_id,
|
|
267
|
+
"volume_size_gb": config.volume_size,
|
|
268
|
+
"region": config.region,
|
|
269
|
+
}
|
|
270
|
+
if config.ssh_port != 22:
|
|
271
|
+
result["ssh_port"] = config.ssh_port
|
|
272
|
+
if config.python_version:
|
|
273
|
+
result["python_version"] = config.python_version
|
|
274
|
+
if config.ebs_storage:
|
|
275
|
+
result["ebs_storage_gb"] = config.ebs_storage
|
|
276
|
+
if config.ebs_volume_id:
|
|
277
|
+
result["ebs_volume_id"] = config.ebs_volume_id
|
|
278
|
+
emit(result, ctx=ctx)
|
|
198
279
|
return
|
|
199
280
|
|
|
200
281
|
# Step 4: Launch instance
|
|
201
|
-
step(4,
|
|
282
|
+
step(4, total_steps, f"Launching {config.instance_type} instance ({pricing})...")
|
|
202
283
|
instance = launch_instance(ec2, config, ami["ImageId"], sg_id)
|
|
203
284
|
instance_id = instance["InstanceId"]
|
|
204
285
|
val("Instance ID", instance_id)
|
|
205
286
|
|
|
206
287
|
# Step 5: Wait for ready
|
|
207
|
-
step(5,
|
|
288
|
+
step(5, total_steps, "Waiting for instance to be ready...")
|
|
208
289
|
instance = wait_instance_ready(ec2, instance_id)
|
|
209
290
|
public_ip = instance.get("PublicIpAddress")
|
|
210
291
|
if not public_ip:
|
|
@@ -213,9 +294,39 @@ def launch(
|
|
|
213
294
|
return
|
|
214
295
|
|
|
215
296
|
val("Public IP", public_ip)
|
|
297
|
+
az = instance["Placement"]["AvailabilityZone"]
|
|
298
|
+
|
|
299
|
+
# Step 5.5 (optional): EBS data volume
|
|
300
|
+
ebs_volume_attached = None
|
|
301
|
+
ebs_format = False
|
|
302
|
+
if has_ebs:
|
|
303
|
+
step(6, total_steps, "Setting up EBS data volume...")
|
|
304
|
+
if config.ebs_storage:
|
|
305
|
+
info(f"Creating {config.ebs_storage} GB gp3 volume in {az}...")
|
|
306
|
+
ebs_volume_attached = create_ebs_volume(ec2, config.ebs_storage, az, config.tag_value, instance_id)
|
|
307
|
+
val("Volume ID", ebs_volume_attached)
|
|
308
|
+
ebs_format = True
|
|
309
|
+
elif config.ebs_volume_id:
|
|
310
|
+
info(f"Validating volume {config.ebs_volume_id}...")
|
|
311
|
+
validate_ebs_volume(ec2, config.ebs_volume_id, az)
|
|
312
|
+
ebs_volume_attached = config.ebs_volume_id
|
|
313
|
+
# Tag the existing volume for discovery
|
|
314
|
+
ec2.create_tags(
|
|
315
|
+
Resources=[ebs_volume_attached],
|
|
316
|
+
Tags=[
|
|
317
|
+
{"Key": "aws-bootstrap-instance", "Value": instance_id},
|
|
318
|
+
{"Key": "created-by", "Value": config.tag_value},
|
|
319
|
+
],
|
|
320
|
+
)
|
|
321
|
+
ebs_format = False
|
|
322
|
+
|
|
323
|
+
info(f"Attaching {ebs_volume_attached} to {instance_id}...")
|
|
324
|
+
attach_ebs_volume(ec2, ebs_volume_attached, instance_id)
|
|
325
|
+
success("EBS volume attached.")
|
|
216
326
|
|
|
217
|
-
#
|
|
218
|
-
|
|
327
|
+
# SSH and remote setup step
|
|
328
|
+
ssh_step = 7 if has_ebs else 6
|
|
329
|
+
step(ssh_step, total_steps, "Waiting for SSH access...")
|
|
219
330
|
private_key = private_key_path(config.key_path)
|
|
220
331
|
if not wait_for_ssh(public_ip, config.ssh_user, config.key_path, port=config.ssh_port):
|
|
221
332
|
warn("SSH did not become available within the timeout.")
|
|
@@ -238,6 +349,22 @@ def launch(
|
|
|
238
349
|
else:
|
|
239
350
|
warn("Remote setup failed. Instance is still running.")
|
|
240
351
|
|
|
352
|
+
# Mount EBS volume via SSH (after setup so the instance is fully ready)
|
|
353
|
+
if ebs_volume_attached:
|
|
354
|
+
info(f"Mounting EBS volume at {EBS_MOUNT_POINT}...")
|
|
355
|
+
if mount_ebs_volume(
|
|
356
|
+
public_ip,
|
|
357
|
+
config.ssh_user,
|
|
358
|
+
config.key_path,
|
|
359
|
+
ebs_volume_attached,
|
|
360
|
+
mount_point=EBS_MOUNT_POINT,
|
|
361
|
+
format_volume=ebs_format,
|
|
362
|
+
port=config.ssh_port,
|
|
363
|
+
):
|
|
364
|
+
success(f"EBS volume mounted at {EBS_MOUNT_POINT}.")
|
|
365
|
+
else:
|
|
366
|
+
warn(f"Failed to mount EBS volume at {EBS_MOUNT_POINT}. You may need to mount it manually.")
|
|
367
|
+
|
|
241
368
|
# Add SSH config alias
|
|
242
369
|
alias = add_ssh_host(
|
|
243
370
|
instance_id=instance_id,
|
|
@@ -249,7 +376,30 @@ def launch(
|
|
|
249
376
|
)
|
|
250
377
|
success(f"Added SSH config alias: {alias}")
|
|
251
378
|
|
|
252
|
-
#
|
|
379
|
+
# Structured output for non-text modes
|
|
380
|
+
if not is_text(ctx):
|
|
381
|
+
result_data: dict = {
|
|
382
|
+
"instance_id": instance_id,
|
|
383
|
+
"public_ip": public_ip,
|
|
384
|
+
"instance_type": config.instance_type,
|
|
385
|
+
"availability_zone": az,
|
|
386
|
+
"ami_id": ami["ImageId"],
|
|
387
|
+
"pricing": pricing,
|
|
388
|
+
"region": config.region,
|
|
389
|
+
"ssh_alias": alias,
|
|
390
|
+
}
|
|
391
|
+
if ebs_volume_attached:
|
|
392
|
+
ebs_info: dict = {
|
|
393
|
+
"volume_id": ebs_volume_attached,
|
|
394
|
+
"mount_point": EBS_MOUNT_POINT,
|
|
395
|
+
}
|
|
396
|
+
if config.ebs_storage:
|
|
397
|
+
ebs_info["size_gb"] = config.ebs_storage
|
|
398
|
+
result_data["ebs_volume"] = ebs_info
|
|
399
|
+
emit(result_data, ctx=ctx)
|
|
400
|
+
return
|
|
401
|
+
|
|
402
|
+
# Print connection info (text mode)
|
|
253
403
|
click.echo()
|
|
254
404
|
click.secho("=" * 60, fg="green")
|
|
255
405
|
click.secho(" Instance ready!", bold=True, fg="green")
|
|
@@ -260,6 +410,12 @@ def launch(
|
|
|
260
410
|
val("Instance", config.instance_type)
|
|
261
411
|
val("Pricing", pricing)
|
|
262
412
|
val("SSH alias", alias)
|
|
413
|
+
if ebs_volume_attached:
|
|
414
|
+
if config.ebs_storage:
|
|
415
|
+
ebs_label = f"{ebs_volume_attached} ({config.ebs_storage} GB, {EBS_MOUNT_POINT})"
|
|
416
|
+
else:
|
|
417
|
+
ebs_label = f"{ebs_volume_attached} ({EBS_MOUNT_POINT})"
|
|
418
|
+
val("EBS data volume", ebs_label)
|
|
263
419
|
|
|
264
420
|
port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
|
|
265
421
|
|
|
@@ -304,44 +460,66 @@ def launch(
|
|
|
304
460
|
show_default=True,
|
|
305
461
|
help="Show connection commands (SSH, Jupyter, VSCode) for each running instance.",
|
|
306
462
|
)
|
|
307
|
-
|
|
463
|
+
@click.pass_context
|
|
464
|
+
def status(ctx, region, profile, gpu, instructions):
|
|
308
465
|
"""Show running instances created by aws-bootstrap."""
|
|
309
466
|
session = boto3.Session(profile_name=profile, region_name=region)
|
|
310
467
|
ec2 = session.client("ec2")
|
|
311
468
|
|
|
312
469
|
instances = find_tagged_instances(ec2, "aws-bootstrap-g4dn")
|
|
313
470
|
if not instances:
|
|
314
|
-
|
|
471
|
+
if is_text(ctx):
|
|
472
|
+
click.secho("No active aws-bootstrap instances found.", fg="yellow")
|
|
473
|
+
else:
|
|
474
|
+
emit({"instances": []}, ctx=ctx)
|
|
315
475
|
return
|
|
316
476
|
|
|
317
477
|
ssh_hosts = list_ssh_hosts()
|
|
318
478
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
479
|
+
if is_text(ctx):
|
|
480
|
+
click.secho(f"\n Found {len(instances)} instance(s):\n", bold=True, fg="cyan")
|
|
481
|
+
if gpu:
|
|
482
|
+
click.echo(" " + click.style("Querying GPU info via SSH...", dim=True))
|
|
483
|
+
click.echo()
|
|
484
|
+
|
|
485
|
+
structured_instances = []
|
|
323
486
|
|
|
324
487
|
for inst in instances:
|
|
325
488
|
state = inst["State"]
|
|
326
|
-
state_color = {
|
|
327
|
-
"running": "green",
|
|
328
|
-
"pending": "yellow",
|
|
329
|
-
"stopping": "yellow",
|
|
330
|
-
"stopped": "red",
|
|
331
|
-
"shutting-down": "red",
|
|
332
|
-
}.get(state, "white")
|
|
333
489
|
alias = ssh_hosts.get(inst["InstanceId"])
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
490
|
+
|
|
491
|
+
# Text mode: inline display
|
|
492
|
+
if is_text(ctx):
|
|
493
|
+
state_color = {
|
|
494
|
+
"running": "green",
|
|
495
|
+
"pending": "yellow",
|
|
496
|
+
"stopping": "yellow",
|
|
497
|
+
"stopped": "red",
|
|
498
|
+
"shutting-down": "red",
|
|
499
|
+
}.get(state, "white")
|
|
500
|
+
alias_str = f" ({alias})" if alias else ""
|
|
501
|
+
click.echo(
|
|
502
|
+
" "
|
|
503
|
+
+ click.style(inst["InstanceId"], fg="bright_white")
|
|
504
|
+
+ click.style(alias_str, fg="cyan")
|
|
505
|
+
+ " "
|
|
506
|
+
+ click.style(state, fg=state_color)
|
|
507
|
+
)
|
|
508
|
+
val(" Type", inst["InstanceType"])
|
|
509
|
+
if inst["PublicIp"]:
|
|
510
|
+
val(" IP", inst["PublicIp"])
|
|
511
|
+
|
|
512
|
+
# Build structured record
|
|
513
|
+
inst_data: dict = {
|
|
514
|
+
"instance_id": inst["InstanceId"],
|
|
515
|
+
"state": state,
|
|
516
|
+
"instance_type": inst["InstanceType"],
|
|
517
|
+
"public_ip": inst["PublicIp"] or None,
|
|
518
|
+
"ssh_alias": alias,
|
|
519
|
+
"lifecycle": inst["Lifecycle"],
|
|
520
|
+
"availability_zone": inst["AvailabilityZone"],
|
|
521
|
+
"launch_time": inst["LaunchTime"],
|
|
522
|
+
}
|
|
345
523
|
|
|
346
524
|
# Look up SSH config details once (used by --gpu and --with-instructions)
|
|
347
525
|
details = None
|
|
@@ -359,45 +537,81 @@ def status(region, profile, gpu, instructions):
|
|
|
359
537
|
Path("~/.ssh/id_ed25519").expanduser(),
|
|
360
538
|
)
|
|
361
539
|
if gpu_info:
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
540
|
+
if is_text(ctx):
|
|
541
|
+
val(" GPU", f"{gpu_info.gpu_name} ({gpu_info.architecture})")
|
|
542
|
+
if gpu_info.cuda_toolkit_version:
|
|
543
|
+
cuda_str = gpu_info.cuda_toolkit_version
|
|
544
|
+
if gpu_info.cuda_driver_version != gpu_info.cuda_toolkit_version:
|
|
545
|
+
cuda_str += f" (driver supports up to {gpu_info.cuda_driver_version})"
|
|
546
|
+
else:
|
|
547
|
+
cuda_str = f"{gpu_info.cuda_driver_version} (driver max, toolkit unknown)"
|
|
548
|
+
val(" CUDA", cuda_str)
|
|
549
|
+
val(" Driver", gpu_info.driver_version)
|
|
550
|
+
inst_data["gpu"] = {
|
|
551
|
+
"name": gpu_info.gpu_name,
|
|
552
|
+
"architecture": gpu_info.architecture,
|
|
553
|
+
"cuda_toolkit": gpu_info.cuda_toolkit_version,
|
|
554
|
+
"cuda_driver_max": gpu_info.cuda_driver_version,
|
|
555
|
+
"driver": gpu_info.driver_version,
|
|
556
|
+
}
|
|
371
557
|
else:
|
|
372
|
-
|
|
558
|
+
if is_text(ctx):
|
|
559
|
+
click.echo(" GPU: " + click.style("unavailable", dim=True))
|
|
560
|
+
|
|
561
|
+
# EBS data volumes
|
|
562
|
+
ebs_volumes = find_ebs_volumes_for_instance(ec2, inst["InstanceId"], "aws-bootstrap-g4dn")
|
|
563
|
+
if ebs_volumes:
|
|
564
|
+
if is_text(ctx):
|
|
565
|
+
for vol in ebs_volumes:
|
|
566
|
+
vol_state = f", {vol['State']}" if vol["State"] != "in-use" else ""
|
|
567
|
+
val(" EBS", f"{vol['VolumeId']} ({vol['Size']} GB, {EBS_MOUNT_POINT}{vol_state})")
|
|
568
|
+
inst_data["ebs_volumes"] = [
|
|
569
|
+
{
|
|
570
|
+
"volume_id": vol["VolumeId"],
|
|
571
|
+
"size_gb": vol["Size"],
|
|
572
|
+
"mount_point": EBS_MOUNT_POINT,
|
|
573
|
+
"state": vol["State"],
|
|
574
|
+
}
|
|
575
|
+
for vol in ebs_volumes
|
|
576
|
+
]
|
|
373
577
|
|
|
374
578
|
lifecycle = inst["Lifecycle"]
|
|
375
579
|
is_spot = lifecycle == "spot"
|
|
580
|
+
spot_price = None
|
|
376
581
|
|
|
377
582
|
if is_spot:
|
|
378
583
|
spot_price = get_spot_price(ec2, inst["InstanceType"], inst["AvailabilityZone"])
|
|
584
|
+
if is_text(ctx):
|
|
585
|
+
if spot_price is not None:
|
|
586
|
+
val(" Pricing", f"spot (${spot_price:.4f}/hr)")
|
|
587
|
+
else:
|
|
588
|
+
val(" Pricing", "spot")
|
|
379
589
|
if spot_price is not None:
|
|
380
|
-
|
|
381
|
-
else:
|
|
382
|
-
val(" Pricing", "spot")
|
|
590
|
+
inst_data["spot_price_per_hour"] = spot_price
|
|
383
591
|
else:
|
|
384
|
-
|
|
592
|
+
if is_text(ctx):
|
|
593
|
+
val(" Pricing", "on-demand")
|
|
385
594
|
|
|
386
595
|
if state == "running" and is_spot:
|
|
387
596
|
uptime = datetime.now(UTC) - inst["LaunchTime"]
|
|
388
597
|
total_seconds = int(uptime.total_seconds())
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
598
|
+
inst_data["uptime_seconds"] = total_seconds
|
|
599
|
+
if is_text(ctx):
|
|
600
|
+
hours, remainder = divmod(total_seconds, 3600)
|
|
601
|
+
minutes = remainder // 60
|
|
602
|
+
val(" Uptime", f"{hours}h {minutes:02d}m")
|
|
392
603
|
if spot_price is not None:
|
|
393
604
|
uptime_hours = uptime.total_seconds() / 3600
|
|
394
605
|
est_cost = uptime_hours * spot_price
|
|
395
|
-
|
|
606
|
+
inst_data["estimated_cost"] = round(est_cost, 4)
|
|
607
|
+
if is_text(ctx):
|
|
608
|
+
val(" Est. cost", f"~${est_cost:.4f}")
|
|
396
609
|
|
|
397
|
-
|
|
610
|
+
if is_text(ctx):
|
|
611
|
+
val(" Launched", str(inst["LaunchTime"]))
|
|
398
612
|
|
|
399
613
|
# Connection instructions (opt-in, only for running instances with a public IP and alias)
|
|
400
|
-
if instructions and state == "running" and inst["PublicIp"] and alias:
|
|
614
|
+
if is_text(ctx) and instructions and state == "running" and inst["PublicIp"] and alias:
|
|
401
615
|
user = details.user if details else "ubuntu"
|
|
402
616
|
port = details.port if details else 22
|
|
403
617
|
port_flag = f" -p {port}" if port != 22 else ""
|
|
@@ -418,6 +632,24 @@ def status(region, profile, gpu, instructions):
|
|
|
418
632
|
click.secho(" GPU Benchmark:", fg="cyan")
|
|
419
633
|
click.secho(f" ssh {alias} 'python ~/gpu_benchmark.py'", bold=True)
|
|
420
634
|
|
|
635
|
+
structured_instances.append(inst_data)
|
|
636
|
+
|
|
637
|
+
if not is_text(ctx):
|
|
638
|
+
emit(
|
|
639
|
+
{"instances": structured_instances},
|
|
640
|
+
headers={
|
|
641
|
+
"instance_id": "Instance ID",
|
|
642
|
+
"state": "State",
|
|
643
|
+
"instance_type": "Type",
|
|
644
|
+
"public_ip": "IP",
|
|
645
|
+
"ssh_alias": "Alias",
|
|
646
|
+
"lifecycle": "Pricing",
|
|
647
|
+
"uptime_seconds": "Uptime (s)",
|
|
648
|
+
},
|
|
649
|
+
ctx=ctx,
|
|
650
|
+
)
|
|
651
|
+
return
|
|
652
|
+
|
|
421
653
|
click.echo()
|
|
422
654
|
first_id = instances[0]["InstanceId"]
|
|
423
655
|
first_ref = ssh_hosts.get(first_id, first_id)
|
|
@@ -429,8 +661,10 @@ def status(region, profile, gpu, instructions):
|
|
|
429
661
|
@click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
|
|
430
662
|
@click.option("--profile", default=None, help="AWS profile override.")
|
|
431
663
|
@click.option("--yes", "-y", is_flag=True, default=False, help="Skip confirmation prompt.")
|
|
664
|
+
@click.option("--keep-ebs", is_flag=True, default=False, help="Preserve EBS data volumes instead of deleting them.")
|
|
432
665
|
@click.argument("instance_ids", nargs=-1, metavar="[INSTANCE_ID_OR_ALIAS]...")
|
|
433
|
-
|
|
666
|
+
@click.pass_context
|
|
667
|
+
def terminate(ctx, region, profile, yes, keep_ebs, instance_ids):
|
|
434
668
|
"""Terminate instances created by aws-bootstrap.
|
|
435
669
|
|
|
436
670
|
Pass specific instance IDs or SSH aliases (e.g. aws-gpu1) to terminate,
|
|
@@ -439,6 +673,10 @@ def terminate(region, profile, yes, instance_ids):
|
|
|
439
673
|
session = boto3.Session(profile_name=profile, region_name=region)
|
|
440
674
|
ec2 = session.client("ec2")
|
|
441
675
|
|
|
676
|
+
# In structured output modes, require --yes (prompts would corrupt output)
|
|
677
|
+
if not is_text(ctx) and not yes:
|
|
678
|
+
raise CLIError("--yes is required when using structured output (--output json/yaml/table).")
|
|
679
|
+
|
|
442
680
|
if instance_ids:
|
|
443
681
|
targets = []
|
|
444
682
|
for value in instance_ids:
|
|
@@ -454,13 +692,17 @@ def terminate(region, profile, yes, instance_ids):
|
|
|
454
692
|
else:
|
|
455
693
|
instances = find_tagged_instances(ec2, "aws-bootstrap-g4dn")
|
|
456
694
|
if not instances:
|
|
457
|
-
|
|
695
|
+
if is_text(ctx):
|
|
696
|
+
click.secho("No active aws-bootstrap instances found.", fg="yellow")
|
|
697
|
+
else:
|
|
698
|
+
emit({"terminated": []}, ctx=ctx)
|
|
458
699
|
return
|
|
459
700
|
targets = [inst["InstanceId"] for inst in instances]
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
701
|
+
if is_text(ctx):
|
|
702
|
+
click.secho(f"\n Found {len(targets)} instance(s) to terminate:\n", bold=True, fg="cyan")
|
|
703
|
+
for inst in instances:
|
|
704
|
+
iid = click.style(inst["InstanceId"], fg="bright_white")
|
|
705
|
+
click.echo(f" {iid} {inst['State']} {inst['InstanceType']}")
|
|
464
706
|
|
|
465
707
|
if not yes:
|
|
466
708
|
click.echo()
|
|
@@ -468,21 +710,143 @@ def terminate(region, profile, yes, instance_ids):
|
|
|
468
710
|
click.secho(" Cancelled.", fg="yellow")
|
|
469
711
|
return
|
|
470
712
|
|
|
713
|
+
# Discover EBS volumes before termination (while instances still exist)
|
|
714
|
+
ebs_by_instance: dict[str, list[dict]] = {}
|
|
715
|
+
for target in targets:
|
|
716
|
+
volumes = find_ebs_volumes_for_instance(ec2, target, "aws-bootstrap-g4dn")
|
|
717
|
+
if volumes:
|
|
718
|
+
ebs_by_instance[target] = volumes
|
|
719
|
+
|
|
471
720
|
changes = terminate_tagged_instances(ec2, targets)
|
|
472
|
-
|
|
721
|
+
|
|
722
|
+
terminated_results = []
|
|
723
|
+
|
|
724
|
+
if is_text(ctx):
|
|
725
|
+
click.echo()
|
|
473
726
|
for change in changes:
|
|
474
727
|
prev = change["PreviousState"]["Name"]
|
|
475
728
|
curr = change["CurrentState"]["Name"]
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
removed_alias = remove_ssh_host(
|
|
729
|
+
iid = change["InstanceId"]
|
|
730
|
+
if is_text(ctx):
|
|
731
|
+
click.echo(" " + click.style(iid, fg="bright_white") + f" {prev} -> " + click.style(curr, fg="red"))
|
|
732
|
+
removed_alias = remove_ssh_host(iid)
|
|
480
733
|
if removed_alias:
|
|
481
734
|
info(f"Removed SSH config alias: {removed_alias}")
|
|
735
|
+
|
|
736
|
+
change_data: dict = {
|
|
737
|
+
"instance_id": iid,
|
|
738
|
+
"previous_state": prev,
|
|
739
|
+
"current_state": curr,
|
|
740
|
+
}
|
|
741
|
+
if removed_alias:
|
|
742
|
+
change_data["ssh_alias_removed"] = removed_alias
|
|
743
|
+
terminated_results.append(change_data)
|
|
744
|
+
|
|
745
|
+
# Handle EBS volume cleanup
|
|
746
|
+
for _iid, volumes in ebs_by_instance.items():
|
|
747
|
+
for vol in volumes:
|
|
748
|
+
vid = vol["VolumeId"]
|
|
749
|
+
if keep_ebs:
|
|
750
|
+
if is_text(ctx):
|
|
751
|
+
click.echo()
|
|
752
|
+
info(f"Preserving EBS volume: {vid} ({vol['Size']} GB)")
|
|
753
|
+
info(f"Reattach with: aws-bootstrap launch --ebs-volume-id {vid}")
|
|
754
|
+
else:
|
|
755
|
+
if is_text(ctx):
|
|
756
|
+
click.echo()
|
|
757
|
+
info(f"Waiting for EBS volume {vid} to detach...")
|
|
758
|
+
try:
|
|
759
|
+
waiter = ec2.get_waiter("volume_available")
|
|
760
|
+
waiter.wait(VolumeIds=[vid], WaiterConfig={"Delay": 10, "MaxAttempts": 30})
|
|
761
|
+
delete_ebs_volume(ec2, vid)
|
|
762
|
+
success(f"Deleted EBS volume: {vid}")
|
|
763
|
+
# Record deleted volume in the corresponding terminated result
|
|
764
|
+
for tr in terminated_results:
|
|
765
|
+
if tr["instance_id"] == _iid:
|
|
766
|
+
tr.setdefault("ebs_volumes_deleted", []).append(vid)
|
|
767
|
+
except Exception as e:
|
|
768
|
+
warn(f"Failed to delete EBS volume {vid}: {e}")
|
|
769
|
+
|
|
770
|
+
if not is_text(ctx):
|
|
771
|
+
emit({"terminated": terminated_results}, ctx=ctx)
|
|
772
|
+
return
|
|
773
|
+
|
|
482
774
|
click.echo()
|
|
483
775
|
success(f"Terminated {len(changes)} instance(s).")
|
|
484
776
|
|
|
485
777
|
|
|
778
|
+
@main.command()
|
|
779
|
+
@click.option("--dry-run", is_flag=True, default=False, help="Show what would be removed without removing.")
|
|
780
|
+
@click.option("--yes", "-y", is_flag=True, default=False, help="Skip confirmation prompt.")
|
|
781
|
+
@click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
|
|
782
|
+
@click.option("--profile", default=None, help="AWS profile override.")
|
|
783
|
+
@click.pass_context
|
|
784
|
+
def cleanup(ctx, dry_run, yes, region, profile):
|
|
785
|
+
"""Remove stale SSH config entries for terminated instances."""
|
|
786
|
+
session = boto3.Session(profile_name=profile, region_name=region)
|
|
787
|
+
ec2 = session.client("ec2")
|
|
788
|
+
|
|
789
|
+
# In structured output modes, require --yes for non-dry-run (prompts would corrupt output)
|
|
790
|
+
if not is_text(ctx) and not yes and not dry_run:
|
|
791
|
+
raise CLIError("--yes is required when using structured output (--output json/yaml/table).")
|
|
792
|
+
|
|
793
|
+
live_instances = find_tagged_instances(ec2, "aws-bootstrap-g4dn")
|
|
794
|
+
live_ids = {inst["InstanceId"] for inst in live_instances}
|
|
795
|
+
|
|
796
|
+
stale = find_stale_ssh_hosts(live_ids)
|
|
797
|
+
if not stale:
|
|
798
|
+
if is_text(ctx):
|
|
799
|
+
click.secho("No stale SSH config entries found.", fg="green")
|
|
800
|
+
else:
|
|
801
|
+
result_key = "stale" if dry_run else "cleaned"
|
|
802
|
+
emit({result_key: []}, ctx=ctx)
|
|
803
|
+
return
|
|
804
|
+
|
|
805
|
+
if is_text(ctx):
|
|
806
|
+
click.secho(f"\n Found {len(stale)} stale SSH config entry(ies):\n", bold=True, fg="cyan")
|
|
807
|
+
for iid, alias in stale:
|
|
808
|
+
click.echo(" " + click.style(alias, fg="bright_white") + f" ({iid})")
|
|
809
|
+
|
|
810
|
+
if dry_run:
|
|
811
|
+
if is_text(ctx):
|
|
812
|
+
click.echo()
|
|
813
|
+
for iid, alias in stale:
|
|
814
|
+
info(f"Would remove {alias} ({iid})")
|
|
815
|
+
else:
|
|
816
|
+
emit(
|
|
817
|
+
{
|
|
818
|
+
"stale": [{"instance_id": iid, "alias": alias} for iid, alias in stale],
|
|
819
|
+
"dry_run": True,
|
|
820
|
+
},
|
|
821
|
+
ctx=ctx,
|
|
822
|
+
)
|
|
823
|
+
return
|
|
824
|
+
|
|
825
|
+
if not yes:
|
|
826
|
+
click.echo()
|
|
827
|
+
if not click.confirm(f" Remove {len(stale)} stale entry(ies)?"):
|
|
828
|
+
click.secho(" Cancelled.", fg="yellow")
|
|
829
|
+
return
|
|
830
|
+
|
|
831
|
+
results = cleanup_stale_ssh_hosts(live_ids)
|
|
832
|
+
|
|
833
|
+
if not is_text(ctx):
|
|
834
|
+
emit(
|
|
835
|
+
{
|
|
836
|
+
"cleaned": [{"instance_id": r.instance_id, "alias": r.alias, "removed": r.removed} for r in results],
|
|
837
|
+
},
|
|
838
|
+
ctx=ctx,
|
|
839
|
+
)
|
|
840
|
+
return
|
|
841
|
+
|
|
842
|
+
click.echo()
|
|
843
|
+
for r in results:
|
|
844
|
+
success(f"Removed {r.alias} ({r.instance_id})")
|
|
845
|
+
|
|
846
|
+
click.echo()
|
|
847
|
+
success(f"Cleaned up {len(results)} stale entry(ies).")
|
|
848
|
+
|
|
849
|
+
|
|
486
850
|
# ---------------------------------------------------------------------------
|
|
487
851
|
# list command group
|
|
488
852
|
# ---------------------------------------------------------------------------
|
|
@@ -499,14 +863,40 @@ def list_cmd():
|
|
|
499
863
|
@click.option("--prefix", default="g4dn", show_default=True, help="Instance type family prefix to filter on.")
|
|
500
864
|
@click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
|
|
501
865
|
@click.option("--profile", default=None, help="AWS profile override.")
|
|
502
|
-
|
|
866
|
+
@click.pass_context
|
|
867
|
+
def list_instance_types_cmd(ctx, prefix, region, profile):
|
|
503
868
|
"""List EC2 instance types matching a family prefix (e.g. g4dn, p3, g5)."""
|
|
504
869
|
session = boto3.Session(profile_name=profile, region_name=region)
|
|
505
870
|
ec2 = session.client("ec2")
|
|
506
871
|
|
|
507
872
|
types = list_instance_types(ec2, prefix)
|
|
508
873
|
if not types:
|
|
509
|
-
|
|
874
|
+
if is_text(ctx):
|
|
875
|
+
click.secho(f"No instance types found matching '{prefix}.*'", fg="yellow")
|
|
876
|
+
else:
|
|
877
|
+
emit([], ctx=ctx)
|
|
878
|
+
return
|
|
879
|
+
|
|
880
|
+
if not is_text(ctx):
|
|
881
|
+
structured = [
|
|
882
|
+
{
|
|
883
|
+
"instance_type": t["InstanceType"],
|
|
884
|
+
"vcpus": t["VCpuCount"],
|
|
885
|
+
"memory_mib": t["MemoryMiB"],
|
|
886
|
+
"gpu": t["GpuSummary"] or None,
|
|
887
|
+
}
|
|
888
|
+
for t in types
|
|
889
|
+
]
|
|
890
|
+
emit(
|
|
891
|
+
structured,
|
|
892
|
+
headers={
|
|
893
|
+
"instance_type": "Instance Type",
|
|
894
|
+
"vcpus": "vCPUs",
|
|
895
|
+
"memory_mib": "Memory (MiB)",
|
|
896
|
+
"gpu": "GPU",
|
|
897
|
+
},
|
|
898
|
+
ctx=ctx,
|
|
899
|
+
)
|
|
510
900
|
return
|
|
511
901
|
|
|
512
902
|
click.secho(f"\n {len(types)} instance type(s) matching '{prefix}.*':\n", bold=True, fg="cyan")
|
|
@@ -518,8 +908,8 @@ def list_instance_types_cmd(prefix, region, profile):
|
|
|
518
908
|
click.echo(" " + "-" * 72)
|
|
519
909
|
|
|
520
910
|
for t in types:
|
|
521
|
-
|
|
522
|
-
click.echo(f" {t['InstanceType']:<24}{t['VCpuCount']:>6}{t['MemoryMiB']:>14} {
|
|
911
|
+
gpu_str = t["GpuSummary"] or "-"
|
|
912
|
+
click.echo(f" {t['InstanceType']:<24}{t['VCpuCount']:>6}{t['MemoryMiB']:>14} {gpu_str}")
|
|
523
913
|
|
|
524
914
|
click.echo()
|
|
525
915
|
|
|
@@ -528,14 +918,40 @@ def list_instance_types_cmd(prefix, region, profile):
|
|
|
528
918
|
@click.option("--filter", "ami_filter", default=DEFAULT_AMI_PREFIX, show_default=True, help="AMI name pattern.")
|
|
529
919
|
@click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
|
|
530
920
|
@click.option("--profile", default=None, help="AWS profile override.")
|
|
531
|
-
|
|
921
|
+
@click.pass_context
|
|
922
|
+
def list_amis_cmd(ctx, ami_filter, region, profile):
|
|
532
923
|
"""List available AMIs matching a name pattern."""
|
|
533
924
|
session = boto3.Session(profile_name=profile, region_name=region)
|
|
534
925
|
ec2 = session.client("ec2")
|
|
535
926
|
|
|
536
927
|
amis = list_amis(ec2, ami_filter)
|
|
537
928
|
if not amis:
|
|
538
|
-
|
|
929
|
+
if is_text(ctx):
|
|
930
|
+
click.secho(f"No AMIs found matching '{ami_filter}'", fg="yellow")
|
|
931
|
+
else:
|
|
932
|
+
emit([], ctx=ctx)
|
|
933
|
+
return
|
|
934
|
+
|
|
935
|
+
if not is_text(ctx):
|
|
936
|
+
structured = [
|
|
937
|
+
{
|
|
938
|
+
"image_id": ami["ImageId"],
|
|
939
|
+
"name": ami["Name"],
|
|
940
|
+
"creation_date": ami["CreationDate"][:10],
|
|
941
|
+
"architecture": ami["Architecture"],
|
|
942
|
+
}
|
|
943
|
+
for ami in amis
|
|
944
|
+
]
|
|
945
|
+
emit(
|
|
946
|
+
structured,
|
|
947
|
+
headers={
|
|
948
|
+
"image_id": "Image ID",
|
|
949
|
+
"name": "Name",
|
|
950
|
+
"creation_date": "Created",
|
|
951
|
+
"architecture": "Arch",
|
|
952
|
+
},
|
|
953
|
+
ctx=ctx,
|
|
954
|
+
)
|
|
539
955
|
return
|
|
540
956
|
|
|
541
957
|
click.secho(f"\n {len(amis)} AMI(s) matching '{ami_filter}' (newest first):\n", bold=True, fg="cyan")
|