aws-bootstrap-g4dn 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aws_bootstrap/cli.py CHANGED
@@ -10,8 +10,13 @@ import click
10
10
 
11
11
  from .config import LaunchConfig
12
12
  from .ec2 import (
13
+ EBS_MOUNT_POINT,
13
14
  CLIError,
15
+ attach_ebs_volume,
16
+ create_ebs_volume,
17
+ delete_ebs_volume,
14
18
  ensure_security_group,
19
+ find_ebs_volumes_for_instance,
15
20
  find_tagged_instances,
16
21
  get_latest_ami,
17
22
  get_spot_price,
@@ -19,13 +24,17 @@ from .ec2 import (
19
24
  list_amis,
20
25
  list_instance_types,
21
26
  terminate_tagged_instances,
27
+ validate_ebs_volume,
22
28
  wait_instance_ready,
23
29
  )
24
30
  from .ssh import (
25
31
  add_ssh_host,
32
+ cleanup_stale_ssh_hosts,
33
+ find_stale_ssh_hosts,
26
34
  get_ssh_host_details,
27
35
  import_key_pair,
28
36
  list_ssh_hosts,
37
+ mount_ebs_volume,
29
38
  private_key_path,
30
39
  query_gpu_info,
31
40
  remove_ssh_host,
@@ -120,6 +129,18 @@ def main():
120
129
  help="Python version for the remote venv (e.g. 3.13, 3.14.2). Passed to uv during setup.",
121
130
  )
122
131
  @click.option("--ssh-port", default=22, show_default=True, type=int, help="SSH port on the remote instance.")
132
+ @click.option(
133
+ "--ebs-storage",
134
+ default=None,
135
+ type=int,
136
+ help="Create and attach a new EBS data volume (size in GB, gp3). Mounted at /data.",
137
+ )
138
+ @click.option(
139
+ "--ebs-volume-id",
140
+ default=None,
141
+ type=str,
142
+ help="Attach an existing EBS volume by ID (e.g. vol-0abc123). Mounted at /data.",
143
+ )
123
144
  def launch(
124
145
  instance_type,
125
146
  ami_filter,
@@ -134,8 +155,13 @@ def launch(
134
155
  profile,
135
156
  python_version,
136
157
  ssh_port,
158
+ ebs_storage,
159
+ ebs_volume_id,
137
160
  ):
138
161
  """Launch a GPU-accelerated EC2 instance."""
162
+ if ebs_storage is not None and ebs_volume_id is not None:
163
+ raise CLIError("--ebs-storage and --ebs-volume-id are mutually exclusive.")
164
+
139
165
  config = LaunchConfig(
140
166
  instance_type=instance_type,
141
167
  spot=spot,
@@ -148,6 +174,8 @@ def launch(
148
174
  dry_run=dry_run,
149
175
  ssh_port=ssh_port,
150
176
  python_version=python_version,
177
+ ebs_storage=ebs_storage,
178
+ ebs_volume_id=ebs_volume_id,
151
179
  )
152
180
  if ami_filter:
153
181
  config.ami_filter = ami_filter
@@ -162,18 +190,21 @@ def launch(
162
190
  session = boto3.Session(profile_name=config.profile, region_name=config.region)
163
191
  ec2 = session.client("ec2")
164
192
 
193
+ has_ebs = config.ebs_storage is not None or config.ebs_volume_id is not None
194
+ total_steps = 7 if has_ebs else 6
195
+
165
196
  # Step 1: AMI lookup
166
- step(1, 6, "Looking up AMI...")
197
+ step(1, total_steps, "Looking up AMI...")
167
198
  ami = get_latest_ami(ec2, config.ami_filter)
168
199
  info(f"Found: {ami['Name']}")
169
200
  val("AMI ID", ami["ImageId"])
170
201
 
171
202
  # Step 2: SSH key pair
172
- step(2, 6, "Importing SSH key pair...")
203
+ step(2, total_steps, "Importing SSH key pair...")
173
204
  import_key_pair(ec2, config.key_name, config.key_path)
174
205
 
175
206
  # Step 3: Security group
176
- step(3, 6, "Ensuring security group...")
207
+ step(3, total_steps, "Ensuring security group...")
177
208
  sg_id = ensure_security_group(ec2, config.security_group, config.tag_value, ssh_port=config.ssh_port)
178
209
 
179
210
  pricing = "spot" if config.spot else "on-demand"
@@ -193,18 +224,22 @@ def launch(
193
224
  val("SSH port", str(config.ssh_port))
194
225
  if config.python_version:
195
226
  val("Python version", config.python_version)
227
+ if config.ebs_storage:
228
+ val("EBS data volume", f"{config.ebs_storage} GB gp3 (new, mounted at {EBS_MOUNT_POINT})")
229
+ if config.ebs_volume_id:
230
+ val("EBS data volume", f"{config.ebs_volume_id} (existing, mounted at {EBS_MOUNT_POINT})")
196
231
  click.echo()
197
232
  click.secho("No resources launched (dry-run mode).", fg="yellow")
198
233
  return
199
234
 
200
235
  # Step 4: Launch instance
201
- step(4, 6, f"Launching {config.instance_type} instance ({pricing})...")
236
+ step(4, total_steps, f"Launching {config.instance_type} instance ({pricing})...")
202
237
  instance = launch_instance(ec2, config, ami["ImageId"], sg_id)
203
238
  instance_id = instance["InstanceId"]
204
239
  val("Instance ID", instance_id)
205
240
 
206
241
  # Step 5: Wait for ready
207
- step(5, 6, "Waiting for instance to be ready...")
242
+ step(5, total_steps, "Waiting for instance to be ready...")
208
243
  instance = wait_instance_ready(ec2, instance_id)
209
244
  public_ip = instance.get("PublicIpAddress")
210
245
  if not public_ip:
@@ -213,9 +248,39 @@ def launch(
213
248
  return
214
249
 
215
250
  val("Public IP", public_ip)
251
+ az = instance["Placement"]["AvailabilityZone"]
252
+
253
+ # Step 5.5 (optional): EBS data volume
254
+ ebs_volume_attached = None
255
+ ebs_format = False
256
+ if has_ebs:
257
+ step(6, total_steps, "Setting up EBS data volume...")
258
+ if config.ebs_storage:
259
+ info(f"Creating {config.ebs_storage} GB gp3 volume in {az}...")
260
+ ebs_volume_attached = create_ebs_volume(ec2, config.ebs_storage, az, config.tag_value, instance_id)
261
+ val("Volume ID", ebs_volume_attached)
262
+ ebs_format = True
263
+ elif config.ebs_volume_id:
264
+ info(f"Validating volume {config.ebs_volume_id}...")
265
+ validate_ebs_volume(ec2, config.ebs_volume_id, az)
266
+ ebs_volume_attached = config.ebs_volume_id
267
+ # Tag the existing volume for discovery
268
+ ec2.create_tags(
269
+ Resources=[ebs_volume_attached],
270
+ Tags=[
271
+ {"Key": "aws-bootstrap-instance", "Value": instance_id},
272
+ {"Key": "created-by", "Value": config.tag_value},
273
+ ],
274
+ )
275
+ ebs_format = False
216
276
 
217
- # Step 6: SSH and remote setup
218
- step(6, 6, "Waiting for SSH access...")
277
+ info(f"Attaching {ebs_volume_attached} to {instance_id}...")
278
+ attach_ebs_volume(ec2, ebs_volume_attached, instance_id)
279
+ success("EBS volume attached.")
280
+
281
+ # SSH and remote setup step
282
+ ssh_step = 7 if has_ebs else 6
283
+ step(ssh_step, total_steps, "Waiting for SSH access...")
219
284
  private_key = private_key_path(config.key_path)
220
285
  if not wait_for_ssh(public_ip, config.ssh_user, config.key_path, port=config.ssh_port):
221
286
  warn("SSH did not become available within the timeout.")
@@ -238,6 +303,22 @@ def launch(
238
303
  else:
239
304
  warn("Remote setup failed. Instance is still running.")
240
305
 
306
+ # Mount EBS volume via SSH (after setup so the instance is fully ready)
307
+ if ebs_volume_attached:
308
+ info(f"Mounting EBS volume at {EBS_MOUNT_POINT}...")
309
+ if mount_ebs_volume(
310
+ public_ip,
311
+ config.ssh_user,
312
+ config.key_path,
313
+ ebs_volume_attached,
314
+ mount_point=EBS_MOUNT_POINT,
315
+ format_volume=ebs_format,
316
+ port=config.ssh_port,
317
+ ):
318
+ success(f"EBS volume mounted at {EBS_MOUNT_POINT}.")
319
+ else:
320
+ warn(f"Failed to mount EBS volume at {EBS_MOUNT_POINT}. You may need to mount it manually.")
321
+
241
322
  # Add SSH config alias
242
323
  alias = add_ssh_host(
243
324
  instance_id=instance_id,
@@ -260,6 +341,12 @@ def launch(
260
341
  val("Instance", config.instance_type)
261
342
  val("Pricing", pricing)
262
343
  val("SSH alias", alias)
344
+ if ebs_volume_attached:
345
+ if config.ebs_storage:
346
+ ebs_label = f"{ebs_volume_attached} ({config.ebs_storage} GB, {EBS_MOUNT_POINT})"
347
+ else:
348
+ ebs_label = f"{ebs_volume_attached} ({EBS_MOUNT_POINT})"
349
+ val("EBS data volume", ebs_label)
263
350
 
264
351
  port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
265
352
 
@@ -371,6 +458,12 @@ def status(region, profile, gpu, instructions):
371
458
  else:
372
459
  click.echo(" GPU: " + click.style("unavailable", dim=True))
373
460
 
461
+ # EBS data volumes
462
+ ebs_volumes = find_ebs_volumes_for_instance(ec2, inst["InstanceId"], "aws-bootstrap-g4dn")
463
+ for vol in ebs_volumes:
464
+ vol_state = f", {vol['State']}" if vol["State"] != "in-use" else ""
465
+ val(" EBS", f"{vol['VolumeId']} ({vol['Size']} GB, {EBS_MOUNT_POINT}{vol_state})")
466
+
374
467
  lifecycle = inst["Lifecycle"]
375
468
  is_spot = lifecycle == "spot"
376
469
 
@@ -429,8 +522,9 @@ def status(region, profile, gpu, instructions):
429
522
  @click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
430
523
  @click.option("--profile", default=None, help="AWS profile override.")
431
524
  @click.option("--yes", "-y", is_flag=True, default=False, help="Skip confirmation prompt.")
525
+ @click.option("--keep-ebs", is_flag=True, default=False, help="Preserve EBS data volumes instead of deleting them.")
432
526
  @click.argument("instance_ids", nargs=-1, metavar="[INSTANCE_ID_OR_ALIAS]...")
433
- def terminate(region, profile, yes, instance_ids):
527
+ def terminate(region, profile, yes, keep_ebs, instance_ids):
434
528
  """Terminate instances created by aws-bootstrap.
435
529
 
436
530
  Pass specific instance IDs or SSH aliases (e.g. aws-gpu1) to terminate,
@@ -468,6 +562,13 @@ def terminate(region, profile, yes, instance_ids):
468
562
  click.secho(" Cancelled.", fg="yellow")
469
563
  return
470
564
 
565
+ # Discover EBS volumes before termination (while instances still exist)
566
+ ebs_by_instance: dict[str, list[dict]] = {}
567
+ for target in targets:
568
+ volumes = find_ebs_volumes_for_instance(ec2, target, "aws-bootstrap-g4dn")
569
+ if volumes:
570
+ ebs_by_instance[target] = volumes
571
+
471
572
  changes = terminate_tagged_instances(ec2, targets)
472
573
  click.echo()
473
574
  for change in changes:
@@ -479,10 +580,73 @@ def terminate(region, profile, yes, instance_ids):
479
580
  removed_alias = remove_ssh_host(change["InstanceId"])
480
581
  if removed_alias:
481
582
  info(f"Removed SSH config alias: {removed_alias}")
583
+
584
+ # Handle EBS volume cleanup
585
+ for _iid, volumes in ebs_by_instance.items():
586
+ for vol in volumes:
587
+ vid = vol["VolumeId"]
588
+ if keep_ebs:
589
+ click.echo()
590
+ info(f"Preserving EBS volume: {vid} ({vol['Size']} GB)")
591
+ info(f"Reattach with: aws-bootstrap launch --ebs-volume-id {vid}")
592
+ else:
593
+ click.echo()
594
+ info(f"Waiting for EBS volume {vid} to detach...")
595
+ try:
596
+ waiter = ec2.get_waiter("volume_available")
597
+ waiter.wait(VolumeIds=[vid], WaiterConfig={"Delay": 10, "MaxAttempts": 30})
598
+ delete_ebs_volume(ec2, vid)
599
+ success(f"Deleted EBS volume: {vid}")
600
+ except Exception as e:
601
+ warn(f"Failed to delete EBS volume {vid}: {e}")
602
+
482
603
  click.echo()
483
604
  success(f"Terminated {len(changes)} instance(s).")
484
605
 
485
606
 
607
+ @main.command()
608
+ @click.option("--dry-run", is_flag=True, default=False, help="Show what would be removed without removing.")
609
+ @click.option("--yes", "-y", is_flag=True, default=False, help="Skip confirmation prompt.")
610
+ @click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
611
+ @click.option("--profile", default=None, help="AWS profile override.")
612
+ def cleanup(dry_run, yes, region, profile):
613
+ """Remove stale SSH config entries for terminated instances."""
614
+ session = boto3.Session(profile_name=profile, region_name=region)
615
+ ec2 = session.client("ec2")
616
+
617
+ live_instances = find_tagged_instances(ec2, "aws-bootstrap-g4dn")
618
+ live_ids = {inst["InstanceId"] for inst in live_instances}
619
+
620
+ stale = find_stale_ssh_hosts(live_ids)
621
+ if not stale:
622
+ click.secho("No stale SSH config entries found.", fg="green")
623
+ return
624
+
625
+ click.secho(f"\n Found {len(stale)} stale SSH config entry(ies):\n", bold=True, fg="cyan")
626
+ for iid, alias in stale:
627
+ click.echo(" " + click.style(alias, fg="bright_white") + f" ({iid})")
628
+
629
+ if dry_run:
630
+ click.echo()
631
+ for iid, alias in stale:
632
+ info(f"Would remove {alias} ({iid})")
633
+ return
634
+
635
+ if not yes:
636
+ click.echo()
637
+ if not click.confirm(f" Remove {len(stale)} stale entry(ies)?"):
638
+ click.secho(" Cancelled.", fg="yellow")
639
+ return
640
+
641
+ results = cleanup_stale_ssh_hosts(live_ids)
642
+ click.echo()
643
+ for r in results:
644
+ success(f"Removed {r.alias} ({r.instance_id})")
645
+
646
+ click.echo()
647
+ success(f"Cleaned up {len(results)} stale entry(ies).")
648
+
649
+
486
650
  # ---------------------------------------------------------------------------
487
651
  # list command group
488
652
  # ---------------------------------------------------------------------------
aws_bootstrap/config.py CHANGED
@@ -24,3 +24,5 @@ class LaunchConfig:
24
24
  alias_prefix: str = "aws-gpu"
25
25
  ssh_port: int = 22
26
26
  python_version: str | None = None
27
+ ebs_storage: int | None = None
28
+ ebs_volume_id: str | None = None
aws_bootstrap/ec2.py CHANGED
@@ -9,6 +9,10 @@ import click
9
9
  from .config import LaunchConfig
10
10
 
11
11
 
12
+ EBS_DEVICE_NAME = "/dev/sdf"
13
+ EBS_MOUNT_POINT = "/data"
14
+
15
+
12
16
  class CLIError(click.ClickException):
13
17
  """A ClickException that displays the error message in red."""
14
18
 
@@ -339,3 +343,127 @@ def wait_instance_ready(ec2_client, instance_id: str) -> dict:
339
343
  desc = ec2_client.describe_instances(InstanceIds=[instance_id])
340
344
  instance = desc["Reservations"][0]["Instances"][0]
341
345
  return instance
346
+
347
+
348
+ # ---------------------------------------------------------------------------
349
+ # EBS data volume operations
350
+ # ---------------------------------------------------------------------------
351
+
352
+
353
+ def create_ebs_volume(ec2_client, size_gb: int, availability_zone: str, tag_value: str, instance_id: str) -> str:
354
+ """Create a gp3 EBS volume and wait for it to become available.
355
+
356
+ Returns the volume ID.
357
+ """
358
+ response = ec2_client.create_volume(
359
+ AvailabilityZone=availability_zone,
360
+ Size=size_gb,
361
+ VolumeType="gp3",
362
+ TagSpecifications=[
363
+ {
364
+ "ResourceType": "volume",
365
+ "Tags": [
366
+ {"Key": "created-by", "Value": tag_value},
367
+ {"Key": "Name", "Value": f"aws-bootstrap-data-{instance_id}"},
368
+ {"Key": "aws-bootstrap-instance", "Value": instance_id},
369
+ ],
370
+ }
371
+ ],
372
+ )
373
+ volume_id = response["VolumeId"]
374
+
375
+ waiter = ec2_client.get_waiter("volume_available")
376
+ waiter.wait(VolumeIds=[volume_id], WaiterConfig={"Delay": 5, "MaxAttempts": 24})
377
+ return volume_id
378
+
379
+
380
+ def validate_ebs_volume(ec2_client, volume_id: str, availability_zone: str) -> dict:
381
+ """Validate that an existing EBS volume can be attached.
382
+
383
+ Checks that the volume exists, is available (not in-use), and is in the
384
+ correct availability zone. Returns the volume description dict.
385
+
386
+ Raises CLIError for validation failures.
387
+ """
388
+ try:
389
+ response = ec2_client.describe_volumes(VolumeIds=[volume_id])
390
+ except botocore.exceptions.ClientError as e:
391
+ if e.response["Error"]["Code"] == "InvalidVolume.NotFound":
392
+ raise CLIError(f"EBS volume not found: {volume_id}") from None
393
+ raise
394
+
395
+ volumes = response["Volumes"]
396
+ if not volumes:
397
+ raise CLIError(f"EBS volume not found: {volume_id}")
398
+
399
+ vol = volumes[0]
400
+
401
+ if vol["State"] != "available":
402
+ raise CLIError(
403
+ f"EBS volume {volume_id} is currently '{vol['State']}' (must be 'available').\n"
404
+ " Detach it from its current instance first."
405
+ )
406
+
407
+ if vol["AvailabilityZone"] != availability_zone:
408
+ raise CLIError(
409
+ f"EBS volume {volume_id} is in {vol['AvailabilityZone']} "
410
+ f"but the instance is in {availability_zone}.\n"
411
+ " EBS volumes must be in the same availability zone as the instance."
412
+ )
413
+
414
+ return vol
415
+
416
+
417
+ def attach_ebs_volume(ec2_client, volume_id: str, instance_id: str, device_name: str = EBS_DEVICE_NAME) -> None:
418
+ """Attach an EBS volume to an instance and wait for it to be in-use."""
419
+ ec2_client.attach_volume(
420
+ VolumeId=volume_id,
421
+ InstanceId=instance_id,
422
+ Device=device_name,
423
+ )
424
+ waiter = ec2_client.get_waiter("volume_in_use")
425
+ waiter.wait(VolumeIds=[volume_id], WaiterConfig={"Delay": 5, "MaxAttempts": 24})
426
+
427
+
428
+ def detach_ebs_volume(ec2_client, volume_id: str) -> None:
429
+ """Detach an EBS volume and wait for it to become available."""
430
+ ec2_client.detach_volume(VolumeId=volume_id)
431
+ waiter = ec2_client.get_waiter("volume_available")
432
+ waiter.wait(VolumeIds=[volume_id], WaiterConfig={"Delay": 5, "MaxAttempts": 24})
433
+
434
+
435
+ def delete_ebs_volume(ec2_client, volume_id: str) -> None:
436
+ """Delete an EBS volume."""
437
+ ec2_client.delete_volume(VolumeId=volume_id)
438
+
439
+
440
+ def find_ebs_volumes_for_instance(ec2_client, instance_id: str, tag_value: str) -> list[dict]:
441
+ """Find EBS data volumes associated with an instance via tags.
442
+
443
+ Returns a list of dicts with VolumeId, Size, Device, and State.
444
+ Excludes root volumes (only returns volumes tagged by aws-bootstrap).
445
+ """
446
+ try:
447
+ response = ec2_client.describe_volumes(
448
+ Filters=[
449
+ {"Name": "tag:aws-bootstrap-instance", "Values": [instance_id]},
450
+ {"Name": "tag:created-by", "Values": [tag_value]},
451
+ ]
452
+ )
453
+ except botocore.exceptions.ClientError:
454
+ return []
455
+
456
+ volumes = []
457
+ for vol in response.get("Volumes", []):
458
+ device = ""
459
+ if vol.get("Attachments"):
460
+ device = vol["Attachments"][0].get("Device", "")
461
+ volumes.append(
462
+ {
463
+ "VolumeId": vol["VolumeId"],
464
+ "Size": vol["Size"],
465
+ "Device": device,
466
+ "State": vol["State"],
467
+ }
468
+ )
469
+ return volumes
@@ -48,8 +48,8 @@ fi
48
48
  # 2. Install utilities
49
49
  echo ""
50
50
  echo "[2/6] Installing utilities..."
51
- sudo apt-get update -qq
52
- sudo apt-get install -y -qq htop tmux tree jq
51
+ sudo DEBIAN_FRONTEND=noninteractive apt-get update -qq
52
+ sudo DEBIAN_FRONTEND=noninteractive apt-get install -y -qq htop tmux tree jq ffmpeg
53
53
 
54
54
  # 3. Set up Python environment with uv
55
55
  echo ""
aws_bootstrap/ssh.py CHANGED
@@ -374,6 +374,37 @@ def list_ssh_hosts(config_path: Path | None = None) -> dict[str, str]:
374
374
  return result
375
375
 
376
376
 
377
+ def find_stale_ssh_hosts(live_instance_ids: set[str], config_path: Path | None = None) -> list[tuple[str, str]]:
378
+ """Identify SSH config entries whose instances no longer exist.
379
+
380
+ Returns ``[(instance_id, alias), ...]`` for entries where the instance ID
381
+ is **not** in *live_instance_ids*, sorted by alias.
382
+ """
383
+ hosts = list_ssh_hosts(config_path)
384
+ stale = [(iid, alias) for iid, alias in hosts.items() if iid not in live_instance_ids]
385
+ stale.sort(key=lambda t: t[1])
386
+ return stale
387
+
388
+
389
+ def cleanup_stale_ssh_hosts(
390
+ live_instance_ids: set[str],
391
+ config_path: Path | None = None,
392
+ dry_run: bool = False,
393
+ ) -> list[CleanupResult]:
394
+ """Remove SSH config entries for terminated/non-existent instances.
395
+
396
+ If *dry_run* is ``True``, entries are identified but not removed.
397
+ Returns a list of :class:`CleanupResult` objects.
398
+ """
399
+ stale = find_stale_ssh_hosts(live_instance_ids, config_path)
400
+ results: list[CleanupResult] = []
401
+ for iid, alias in stale:
402
+ if not dry_run:
403
+ remove_ssh_host(iid, config_path)
404
+ results.append(CleanupResult(instance_id=iid, alias=alias, removed=not dry_run))
405
+ return results
406
+
407
+
377
408
  _INSTANCE_ID_RE = re.compile(r"^i-[0-9a-f]{8,17}$")
378
409
 
379
410
 
@@ -402,6 +433,15 @@ def resolve_instance_id(value: str, config_path: Path | None = None) -> str | No
402
433
  return None
403
434
 
404
435
 
436
+ @dataclass
437
+ class CleanupResult:
438
+ """Result of cleaning up a single stale SSH config entry."""
439
+
440
+ instance_id: str
441
+ alias: str
442
+ removed: bool
443
+
444
+
405
445
  @dataclass
406
446
  class SSHHostDetails:
407
447
  """Connection details parsed from an SSH config stanza."""
@@ -515,6 +555,87 @@ def query_gpu_info(host: str, user: str, key_path: Path, timeout: int = 10, port
515
555
  return None
516
556
 
517
557
 
558
+ # ---------------------------------------------------------------------------
559
+ # EBS volume mount
560
+ # ---------------------------------------------------------------------------
561
+
562
+
563
+ def mount_ebs_volume(
564
+ host: str,
565
+ user: str,
566
+ key_path: Path,
567
+ volume_id: str,
568
+ mount_point: str = "/data",
569
+ format_volume: bool = True,
570
+ port: int = 22,
571
+ ) -> bool:
572
+ """Mount an EBS volume on the remote instance via SSH.
573
+
574
+ Detects the NVMe device by volume ID serial, formats if requested,
575
+ mounts at *mount_point*, and adds an fstab entry for persistence.
576
+
577
+ Returns True on success, False on failure.
578
+ """
579
+ ssh_opts = _ssh_opts(key_path)
580
+ port_opts = ["-p", str(port)] if port != 22 else []
581
+
582
+ # Strip the vol- prefix and hyphen for NVMe serial matching
583
+ vol_serial = volume_id.replace("-", "")
584
+
585
+ format_cmd = ""
586
+ if format_volume:
587
+ format_cmd = (
588
+ ' if ! sudo blkid "$DEVICE" > /dev/null 2>&1; then\n'
589
+ ' echo "Formatting $DEVICE as ext4..."\n'
590
+ ' sudo mkfs.ext4 "$DEVICE"\n'
591
+ " fi\n"
592
+ )
593
+
594
+ remote_script = (
595
+ "set -e\n"
596
+ "# Detect EBS device by NVMe serial (Nitro instances)\n"
597
+ f'SERIAL="{vol_serial}"\n'
598
+ "DEVICE=$(lsblk -o NAME,SERIAL -dpn 2>/dev/null | "
599
+ "awk -v s=\"$SERIAL\" '$2 == s {print $1}' | head -1)\n"
600
+ "# Fallback to common device paths\n"
601
+ 'if [ -z "$DEVICE" ]; then\n'
602
+ " for dev in /dev/nvme1n1 /dev/xvdf /dev/sdf; do\n"
603
+ ' if [ -b "$dev" ]; then DEVICE="$dev"; break; fi\n'
604
+ " done\n"
605
+ "fi\n"
606
+ 'if [ -z "$DEVICE" ]; then\n'
607
+ ' echo "ERROR: Could not find EBS device" >&2\n'
608
+ " exit 1\n"
609
+ "fi\n"
610
+ 'echo "Found EBS device: $DEVICE"\n'
611
+ f"{format_cmd}"
612
+ f"sudo mkdir -p {mount_point}\n"
613
+ f'sudo mount "$DEVICE" {mount_point}\n'
614
+ f"sudo chown {user}:{user} {mount_point}\n"
615
+ "# Add fstab entry for reboot persistence\n"
616
+ 'UUID=$(sudo blkid -s UUID -o value "$DEVICE")\n'
617
+ 'if [ -n "$UUID" ]; then\n'
618
+ f' if ! grep -q "$UUID" /etc/fstab; then\n'
619
+ f' echo "UUID=$UUID {mount_point} ext4 defaults,nofail 0 2" | sudo tee -a /etc/fstab > /dev/null\n'
620
+ " fi\n"
621
+ "fi\n"
622
+ f'echo "Mounted $DEVICE at {mount_point}"'
623
+ )
624
+
625
+ cmd = [
626
+ "ssh",
627
+ *ssh_opts,
628
+ *port_opts,
629
+ "-o",
630
+ "ConnectTimeout=10",
631
+ f"{user}@{host}",
632
+ remote_script,
633
+ ]
634
+
635
+ result = subprocess.run(cmd, capture_output=False)
636
+ return result.returncode == 0
637
+
638
+
518
639
  # ---------------------------------------------------------------------------
519
640
  # Internal helpers
520
641
  # ---------------------------------------------------------------------------