aws-bootstrap-g4dn 0.7.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/CLAUDE.md +1 -1
  2. {aws_bootstrap_g4dn-0.7.0/aws_bootstrap_g4dn.egg-info → aws_bootstrap_g4dn-0.8.0}/PKG-INFO +8 -1
  3. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/README.md +7 -0
  4. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/cli.py +82 -24
  5. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/ec2.py +36 -0
  6. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_cli.py +97 -0
  7. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_ebs.py +90 -0
  8. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0/aws_bootstrap_g4dn.egg-info}/PKG-INFO +8 -1
  9. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/uv.lock +1 -1
  10. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  11. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  12. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/.github/workflows/ci.yml +0 -0
  13. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/.github/workflows/publish-to-pypi.yml +0 -0
  14. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/.gitignore +0 -0
  15. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/.pre-commit-config.yaml +0 -0
  16. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/CODE_OF_CONDUCT.md +0 -0
  17. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/CONTRIBUTING.md +0 -0
  18. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/LICENSE +0 -0
  19. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/SECURITY.md +0 -0
  20. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/__init__.py +0 -0
  21. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/config.py +0 -0
  22. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/gpu.py +0 -0
  23. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/output.py +0 -0
  24. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/__init__.py +0 -0
  25. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/gpu_benchmark.py +0 -0
  26. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/gpu_smoke_test.ipynb +0 -0
  27. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/launch.json +0 -0
  28. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/remote_setup.sh +0 -0
  29. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/requirements.txt +0 -0
  30. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/saxpy.cu +0 -0
  31. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/resources/tasks.json +0 -0
  32. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/ssh.py +0 -0
  33. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/__init__.py +0 -0
  34. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_config.py +0 -0
  35. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_ec2.py +0 -0
  36. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_gpu.py +0 -0
  37. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_output.py +0 -0
  38. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_ssh_config.py +0 -0
  39. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_ssh_ebs.py +0 -0
  40. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap/tests/test_ssh_gpu.py +0 -0
  41. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap_g4dn.egg-info/SOURCES.txt +0 -0
  42. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap_g4dn.egg-info/dependency_links.txt +0 -0
  43. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap_g4dn.egg-info/entry_points.txt +0 -0
  44. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap_g4dn.egg-info/requires.txt +0 -0
  45. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/aws_bootstrap_g4dn.egg-info/top_level.txt +0 -0
  46. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/docs/nsight-remote-profiling.md +0 -0
  47. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/pyproject.toml +0 -0
  48. {aws_bootstrap_g4dn-0.7.0 → aws_bootstrap_g4dn-0.8.0}/setup.cfg +0 -0
@@ -71,7 +71,7 @@ Entry point: `aws-bootstrap = "aws_bootstrap.cli:main"` (installed via `uv sync`
71
71
  - **`launch`** — provisions an EC2 instance (spot by default, falls back to on-demand on capacity errors); adds SSH config alias (e.g. `aws-gpu1`) to `~/.ssh/config`; `--python-version` controls which Python `uv` installs in the remote venv; `--ssh-port` overrides the default SSH port (22) for security group ingress, connection checks, and SSH config; `--ebs-storage SIZE` creates and attaches a new gp3 EBS data volume (mounted at `/data`); `--ebs-volume-id ID` attaches an existing EBS volume (mutually exclusive with `--ebs-storage`)
72
72
  - **`status`** — lists all non-terminated instances (including `shutting-down`) with type, IP, SSH alias, EBS data volumes, pricing (spot price/hr or on-demand), uptime, and estimated cost for running spot instances; `--gpu` flag queries GPU info via SSH, reporting both CUDA toolkit version (from `nvcc`) and driver-supported max (from `nvidia-smi`); `--instructions` (default: on) prints connection commands (SSH, Jupyter tunnel, VSCode Remote SSH, GPU benchmark) for each running instance; suppress with `--no-instructions`
73
73
  - **`terminate`** — terminates instances by ID or SSH alias (e.g. `aws-gpu1`, resolved via `~/.ssh/config`), or all aws-bootstrap instances in the region if no arguments given; removes SSH config aliases; deletes associated EBS data volumes by default; `--keep-ebs` preserves volumes and prints reattach commands
74
- - **`cleanup`** — removes stale `~/.ssh/config` entries for terminated/non-existent instances; compares managed SSH config blocks against live EC2 instances; `--dry-run` previews removals without modifying config; `--yes` skips the confirmation prompt
74
+ - **`cleanup`** — removes stale `~/.ssh/config` entries for terminated/non-existent instances; compares managed SSH config blocks against live EC2 instances; `--include-ebs` also finds and deletes orphan EBS data volumes (volumes in `available` state whose linked instance no longer exists); `--dry-run` previews removals without modifying config; `--yes` skips the confirmation prompt
75
75
  - **`list instance-types`** — lists EC2 instance types matching a family prefix (default: `g4dn`), showing vCPUs, memory, and GPU info
76
76
  - **`list amis`** — lists available AMIs matching a name pattern (default: Deep Learning Base OSS Nvidia Driver GPU AMIs), sorted newest-first
77
77
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aws-bootstrap-g4dn
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Bootstrap AWS EC2 GPU instances for hybrid local-remote development
5
5
  Author: Adam Ever-Hadani
6
6
  License-Expression: MIT
@@ -317,6 +317,12 @@ aws-bootstrap cleanup
317
317
  # Preview what would be removed without modifying config
318
318
  aws-bootstrap cleanup --dry-run
319
319
 
320
+ # Also find and delete orphan EBS data volumes
321
+ aws-bootstrap cleanup --include-ebs
322
+
323
+ # Preview orphan volumes without deleting
324
+ aws-bootstrap cleanup --include-ebs --dry-run
325
+
320
326
  # Skip confirmation prompt
321
327
  aws-bootstrap cleanup --yes
322
328
  ```
@@ -350,6 +356,7 @@ Key behaviors:
350
356
  - New volumes are formatted as ext4; existing volumes are mounted as-is
351
357
  - Volumes are tagged for automatic discovery by `status` and `terminate`
352
358
  - `terminate` deletes data volumes by default; use `--keep-ebs` to preserve them
359
+ - **Orphan cleanup** — use `aws-bootstrap cleanup --include-ebs` to find and delete orphan volumes (e.g. from spot interruptions or forgotten `--keep-ebs` volumes). Use `--dry-run` to preview
353
360
  - **Spot-safe** — data volumes survive spot interruptions. If AWS reclaims your instance, the volume detaches automatically and can be reattached to a new instance with `--ebs-volume-id`
354
361
  - EBS volumes must be in the same availability zone as the instance
355
362
  - Mount failures are non-fatal — the instance remains usable
@@ -296,6 +296,12 @@ aws-bootstrap cleanup
296
296
  # Preview what would be removed without modifying config
297
297
  aws-bootstrap cleanup --dry-run
298
298
 
299
+ # Also find and delete orphan EBS data volumes
300
+ aws-bootstrap cleanup --include-ebs
301
+
302
+ # Preview orphan volumes without deleting
303
+ aws-bootstrap cleanup --include-ebs --dry-run
304
+
299
305
  # Skip confirmation prompt
300
306
  aws-bootstrap cleanup --yes
301
307
  ```
@@ -329,6 +335,7 @@ Key behaviors:
329
335
  - New volumes are formatted as ext4; existing volumes are mounted as-is
330
336
  - Volumes are tagged for automatic discovery by `status` and `terminate`
331
337
  - `terminate` deletes data volumes by default; use `--keep-ebs` to preserve them
338
+ - **Orphan cleanup** — use `aws-bootstrap cleanup --include-ebs` to find and delete orphan volumes (e.g. from spot interruptions or forgotten `--keep-ebs` volumes). Use `--dry-run` to preview
332
339
  - **Spot-safe** — data volumes survive spot interruptions. If AWS reclaims your instance, the volume detaches automatically and can be reattached to a new instance with `--ebs-volume-id`
333
340
  - EBS volumes must be in the same availability zone as the instance
334
341
  - Mount failures are non-fatal — the instance remains usable
@@ -17,6 +17,7 @@ from .ec2 import (
17
17
  delete_ebs_volume,
18
18
  ensure_security_group,
19
19
  find_ebs_volumes_for_instance,
20
+ find_orphan_ebs_volumes,
20
21
  find_tagged_instances,
21
22
  get_latest_ami,
22
23
  get_spot_price,
@@ -778,10 +779,11 @@ def terminate(ctx, region, profile, yes, keep_ebs, instance_ids):
778
779
  @main.command()
779
780
  @click.option("--dry-run", is_flag=True, default=False, help="Show what would be removed without removing.")
780
781
  @click.option("--yes", "-y", is_flag=True, default=False, help="Skip confirmation prompt.")
782
+ @click.option("--include-ebs", is_flag=True, default=False, help="Also find and delete orphan EBS data volumes.")
781
783
  @click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
782
784
  @click.option("--profile", default=None, help="AWS profile override.")
783
785
  @click.pass_context
784
- def cleanup(ctx, dry_run, yes, region, profile):
786
+ def cleanup(ctx, dry_run, yes, include_ebs, region, profile):
785
787
  """Remove stale SSH config entries for terminated instances."""
786
788
  session = boto3.Session(profile_name=profile, region_name=region)
787
789
  ec2 = session.client("ec2")
@@ -794,57 +796,113 @@ def cleanup(ctx, dry_run, yes, region, profile):
794
796
  live_ids = {inst["InstanceId"] for inst in live_instances}
795
797
 
796
798
  stale = find_stale_ssh_hosts(live_ids)
797
- if not stale:
799
+
800
+ # Orphan EBS discovery
801
+ orphan_volumes: list[dict] = []
802
+ if include_ebs:
803
+ orphan_volumes = find_orphan_ebs_volumes(ec2, "aws-bootstrap-g4dn", live_ids)
804
+
805
+ if not stale and not orphan_volumes:
798
806
  if is_text(ctx):
799
- click.secho("No stale SSH config entries found.", fg="green")
807
+ msg = "No stale SSH config entries found."
808
+ if include_ebs:
809
+ msg = "No stale SSH config entries or orphan EBS volumes found."
810
+ click.secho(msg, fg="green")
800
811
  else:
801
812
  result_key = "stale" if dry_run else "cleaned"
802
- emit({result_key: []}, ctx=ctx)
813
+ result: dict = {result_key: []}
814
+ if include_ebs:
815
+ ebs_key = "orphan_volumes" if dry_run else "deleted_volumes"
816
+ result[ebs_key] = []
817
+ emit(result, ctx=ctx)
803
818
  return
804
819
 
805
820
  if is_text(ctx):
806
- click.secho(f"\n Found {len(stale)} stale SSH config entry(ies):\n", bold=True, fg="cyan")
807
- for iid, alias in stale:
808
- click.echo(" " + click.style(alias, fg="bright_white") + f" ({iid})")
821
+ if stale:
822
+ click.secho(f"\n Found {len(stale)} stale SSH config entry(ies):\n", bold=True, fg="cyan")
823
+ for iid, alias in stale:
824
+ click.echo(" " + click.style(alias, fg="bright_white") + f" ({iid})")
825
+ if orphan_volumes:
826
+ click.secho(f"\n Found {len(orphan_volumes)} orphan EBS volume(s):\n", bold=True, fg="cyan")
827
+ for vol in orphan_volumes:
828
+ click.echo(
829
+ " "
830
+ + click.style(vol["VolumeId"], fg="bright_white")
831
+ + f" ({vol['Size']} GB, was {vol['InstanceId']})"
832
+ )
809
833
 
810
834
  if dry_run:
811
835
  if is_text(ctx):
812
836
  click.echo()
813
837
  for iid, alias in stale:
814
838
  info(f"Would remove {alias} ({iid})")
839
+ for vol in orphan_volumes:
840
+ info(f"Would delete {vol['VolumeId']} ({vol['Size']} GB)")
815
841
  else:
816
- emit(
817
- {
818
- "stale": [{"instance_id": iid, "alias": alias} for iid, alias in stale],
819
- "dry_run": True,
820
- },
821
- ctx=ctx,
822
- )
842
+ result = {
843
+ "stale": [{"instance_id": iid, "alias": alias} for iid, alias in stale],
844
+ "dry_run": True,
845
+ }
846
+ if include_ebs:
847
+ result["orphan_volumes"] = [
848
+ {
849
+ "volume_id": vol["VolumeId"],
850
+ "size_gb": vol["Size"],
851
+ "instance_id": vol["InstanceId"],
852
+ }
853
+ for vol in orphan_volumes
854
+ ]
855
+ emit(result, ctx=ctx)
823
856
  return
824
857
 
825
858
  if not yes:
826
859
  click.echo()
827
- if not click.confirm(f" Remove {len(stale)} stale entry(ies)?"):
860
+ parts = []
861
+ if stale:
862
+ parts.append(f"{len(stale)} stale SSH entry(ies)")
863
+ if orphan_volumes:
864
+ parts.append(f"{len(orphan_volumes)} orphan EBS volume(s)")
865
+ if not click.confirm(f" Remove {' and '.join(parts)}?"):
828
866
  click.secho(" Cancelled.", fg="yellow")
829
867
  return
830
868
 
831
- results = cleanup_stale_ssh_hosts(live_ids)
869
+ ssh_results = cleanup_stale_ssh_hosts(live_ids) if stale else []
870
+
871
+ # Delete orphan EBS volumes
872
+ deleted_volumes: list[dict] = []
873
+ for vol in orphan_volumes:
874
+ try:
875
+ delete_ebs_volume(ec2, vol["VolumeId"])
876
+ deleted_volumes.append({"volume_id": vol["VolumeId"], "size_gb": vol["Size"], "deleted": True})
877
+ except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as exc:
878
+ if is_text(ctx):
879
+ warn(f"Failed to delete {vol['VolumeId']}: {exc}")
880
+ deleted_volumes.append({"volume_id": vol["VolumeId"], "size_gb": vol["Size"], "deleted": False})
832
881
 
833
882
  if not is_text(ctx):
834
- emit(
835
- {
836
- "cleaned": [{"instance_id": r.instance_id, "alias": r.alias, "removed": r.removed} for r in results],
837
- },
838
- ctx=ctx,
839
- )
883
+ result = {
884
+ "cleaned": [{"instance_id": r.instance_id, "alias": r.alias, "removed": r.removed} for r in ssh_results],
885
+ }
886
+ if include_ebs:
887
+ result["deleted_volumes"] = deleted_volumes
888
+ emit(result, ctx=ctx)
840
889
  return
841
890
 
842
891
  click.echo()
843
- for r in results:
892
+ for r in ssh_results:
844
893
  success(f"Removed {r.alias} ({r.instance_id})")
894
+ for vol in deleted_volumes:
895
+ if vol["deleted"]:
896
+ success(f"Deleted {vol['volume_id']} ({vol['size_gb']} GB)")
845
897
 
846
898
  click.echo()
847
- success(f"Cleaned up {len(results)} stale entry(ies).")
899
+ parts = []
900
+ if ssh_results:
901
+ parts.append(f"{len(ssh_results)} stale entry(ies)")
902
+ if deleted_volumes:
903
+ ok_count = sum(1 for v in deleted_volumes if v["deleted"])
904
+ parts.append(f"{ok_count} orphan volume(s)")
905
+ success(f"Cleaned up {' and '.join(parts)}.")
848
906
 
849
907
 
850
908
  # ---------------------------------------------------------------------------
@@ -468,3 +468,39 @@ def find_ebs_volumes_for_instance(ec2_client, instance_id: str, tag_value: str)
468
468
  }
469
469
  )
470
470
  return volumes
471
+
472
+
473
+ def find_orphan_ebs_volumes(ec2_client, tag_value: str, live_instance_ids: set[str]) -> list[dict]:
474
+ """Find aws-bootstrap EBS volumes whose linked instance no longer exists.
475
+
476
+ Only returns volumes in ``available`` state (not attached to any instance).
477
+ Volumes that are ``in-use`` are never considered orphans, even if their
478
+ tagged instance ID is not in *live_instance_ids*.
479
+
480
+ Returns a list of dicts with VolumeId, Size, State, and InstanceId
481
+ (the instance ID from the ``aws-bootstrap-instance`` tag).
482
+ """
483
+ try:
484
+ response = ec2_client.describe_volumes(
485
+ Filters=[
486
+ {"Name": "tag:created-by", "Values": [tag_value]},
487
+ {"Name": "status", "Values": ["available"]},
488
+ ]
489
+ )
490
+ except botocore.exceptions.ClientError:
491
+ return []
492
+
493
+ orphans = []
494
+ for vol in response.get("Volumes", []):
495
+ tags = {t["Key"]: t["Value"] for t in vol.get("Tags", [])}
496
+ linked_instance = tags.get("aws-bootstrap-instance", "")
497
+ if linked_instance and linked_instance not in live_instance_ids:
498
+ orphans.append(
499
+ {
500
+ "VolumeId": vol["VolumeId"],
501
+ "Size": vol["Size"],
502
+ "State": vol["State"],
503
+ "InstanceId": linked_instance,
504
+ }
505
+ )
506
+ return orphans
@@ -1258,6 +1258,103 @@ def test_cleanup_with_yes(mock_find, mock_session, mock_stale, mock_cleanup):
1258
1258
  mock_cleanup.assert_called_once()
1259
1259
 
1260
1260
 
1261
+ # ---------------------------------------------------------------------------
1262
+ # cleanup --include-ebs
1263
+ # ---------------------------------------------------------------------------
1264
+
1265
+
1266
+ @patch("aws_bootstrap.cli.find_orphan_ebs_volumes", return_value=[])
1267
+ @patch("aws_bootstrap.cli.find_stale_ssh_hosts", return_value=[])
1268
+ @patch("aws_bootstrap.cli.boto3.Session")
1269
+ @patch("aws_bootstrap.cli.find_tagged_instances", return_value=[])
1270
+ def test_cleanup_include_ebs_no_orphans(mock_find, mock_session, mock_stale, mock_orphan):
1271
+ runner = CliRunner()
1272
+ result = runner.invoke(main, ["cleanup", "--include-ebs"])
1273
+ assert result.exit_code == 0
1274
+ assert "No stale SSH config entries or orphan EBS volumes found." in result.output
1275
+ mock_orphan.assert_called_once()
1276
+
1277
+
1278
+ @patch("aws_bootstrap.cli.find_orphan_ebs_volumes")
1279
+ @patch("aws_bootstrap.cli.find_stale_ssh_hosts", return_value=[])
1280
+ @patch("aws_bootstrap.cli.boto3.Session")
1281
+ @patch("aws_bootstrap.cli.find_tagged_instances", return_value=[])
1282
+ def test_cleanup_include_ebs_dry_run(mock_find, mock_session, mock_stale, mock_orphan):
1283
+ mock_orphan.return_value = [
1284
+ {"VolumeId": "vol-orphan1", "Size": 50, "State": "available", "InstanceId": "i-dead1234"},
1285
+ ]
1286
+ runner = CliRunner()
1287
+ result = runner.invoke(main, ["cleanup", "--include-ebs", "--dry-run"])
1288
+ assert result.exit_code == 0
1289
+ assert "Would delete vol-orphan1" in result.output
1290
+ assert "50 GB" in result.output
1291
+
1292
+
1293
+ @patch("aws_bootstrap.cli.delete_ebs_volume")
1294
+ @patch("aws_bootstrap.cli.find_orphan_ebs_volumes")
1295
+ @patch("aws_bootstrap.cli.find_stale_ssh_hosts", return_value=[])
1296
+ @patch("aws_bootstrap.cli.boto3.Session")
1297
+ @patch("aws_bootstrap.cli.find_tagged_instances", return_value=[])
1298
+ def test_cleanup_include_ebs_delete_with_yes(mock_find, mock_session, mock_stale, mock_orphan, mock_delete):
1299
+ mock_orphan.return_value = [
1300
+ {"VolumeId": "vol-orphan1", "Size": 50, "State": "available", "InstanceId": "i-dead1234"},
1301
+ ]
1302
+ runner = CliRunner()
1303
+ result = runner.invoke(main, ["cleanup", "--include-ebs", "--yes"])
1304
+ assert result.exit_code == 0
1305
+ assert "Deleted vol-orphan1" in result.output
1306
+ mock_delete.assert_called_once_with(mock_session.return_value.client.return_value, "vol-orphan1")
1307
+
1308
+
1309
+ @patch("aws_bootstrap.cli.delete_ebs_volume")
1310
+ @patch("aws_bootstrap.cli.find_orphan_ebs_volumes")
1311
+ @patch("aws_bootstrap.cli.find_stale_ssh_hosts", return_value=[])
1312
+ @patch("aws_bootstrap.cli.boto3.Session")
1313
+ @patch("aws_bootstrap.cli.find_tagged_instances", return_value=[])
1314
+ def test_cleanup_include_ebs_json(mock_find, mock_session, mock_stale, mock_orphan, mock_delete):
1315
+ mock_orphan.return_value = [
1316
+ {"VolumeId": "vol-orphan1", "Size": 50, "State": "available", "InstanceId": "i-dead1234"},
1317
+ ]
1318
+ runner = CliRunner()
1319
+ result = runner.invoke(main, ["-o", "json", "cleanup", "--include-ebs", "--yes"])
1320
+ assert result.exit_code == 0
1321
+ data = json.loads(result.output)
1322
+ assert "deleted_volumes" in data
1323
+ assert len(data["deleted_volumes"]) == 1
1324
+ assert data["deleted_volumes"][0]["volume_id"] == "vol-orphan1"
1325
+ assert data["deleted_volumes"][0]["deleted"] is True
1326
+
1327
+
1328
+ @patch("aws_bootstrap.cli.find_orphan_ebs_volumes")
1329
+ @patch("aws_bootstrap.cli.find_stale_ssh_hosts", return_value=[])
1330
+ @patch("aws_bootstrap.cli.boto3.Session")
1331
+ @patch("aws_bootstrap.cli.find_tagged_instances", return_value=[])
1332
+ def test_cleanup_include_ebs_dry_run_json(mock_find, mock_session, mock_stale, mock_orphan):
1333
+ mock_orphan.return_value = [
1334
+ {"VolumeId": "vol-orphan1", "Size": 50, "State": "available", "InstanceId": "i-dead1234"},
1335
+ ]
1336
+ runner = CliRunner()
1337
+ result = runner.invoke(main, ["-o", "json", "cleanup", "--include-ebs", "--dry-run"])
1338
+ assert result.exit_code == 0
1339
+ data = json.loads(result.output)
1340
+ assert data["dry_run"] is True
1341
+ assert "orphan_volumes" in data
1342
+ assert data["orphan_volumes"][0]["volume_id"] == "vol-orphan1"
1343
+ assert data["orphan_volumes"][0]["size_gb"] == 50
1344
+
1345
+
1346
+ @patch("aws_bootstrap.cli.find_orphan_ebs_volumes", return_value=[])
1347
+ @patch("aws_bootstrap.cli.find_stale_ssh_hosts", return_value=[])
1348
+ @patch("aws_bootstrap.cli.boto3.Session")
1349
+ @patch("aws_bootstrap.cli.find_tagged_instances", return_value=[])
1350
+ def test_cleanup_without_include_ebs_skips_volume_check(mock_find, mock_session, mock_stale, mock_orphan):
1351
+ """Without --include-ebs, orphan volume discovery should not be called."""
1352
+ runner = CliRunner()
1353
+ result = runner.invoke(main, ["cleanup"])
1354
+ assert result.exit_code == 0
1355
+ mock_orphan.assert_not_called()
1356
+
1357
+
1261
1358
  # ---------------------------------------------------------------------------
1262
1359
  # --output structured format tests
1263
1360
  # ---------------------------------------------------------------------------
@@ -14,6 +14,7 @@ from aws_bootstrap.ec2 import (
14
14
  delete_ebs_volume,
15
15
  detach_ebs_volume,
16
16
  find_ebs_volumes_for_instance,
17
+ find_orphan_ebs_volumes,
17
18
  validate_ebs_volume,
18
19
  )
19
20
 
@@ -243,3 +244,92 @@ def test_find_ebs_volumes_client_error_returns_empty():
243
244
  )
244
245
  volumes = find_ebs_volumes_for_instance(ec2, "i-test", "aws-bootstrap-g4dn")
245
246
  assert volumes == []
247
+
248
+
249
+ # ---------------------------------------------------------------------------
250
+ # find_orphan_ebs_volumes
251
+ # ---------------------------------------------------------------------------
252
+
253
+
254
+ def test_find_orphan_ebs_volumes_returns_orphans():
255
+ """Volumes whose linked instance is not live should be returned."""
256
+ ec2 = MagicMock()
257
+ ec2.describe_volumes.return_value = {
258
+ "Volumes": [
259
+ {
260
+ "VolumeId": "vol-orphan1",
261
+ "Size": 50,
262
+ "State": "available",
263
+ "Tags": [
264
+ {"Key": "created-by", "Value": "aws-bootstrap-g4dn"},
265
+ {"Key": "aws-bootstrap-instance", "Value": "i-dead1234"},
266
+ ],
267
+ }
268
+ ]
269
+ }
270
+ orphans = find_orphan_ebs_volumes(ec2, "aws-bootstrap-g4dn", live_instance_ids=set())
271
+ assert len(orphans) == 1
272
+ assert orphans[0]["VolumeId"] == "vol-orphan1"
273
+ assert orphans[0]["InstanceId"] == "i-dead1234"
274
+ assert orphans[0]["Size"] == 50
275
+
276
+ # Verify the API was called with status=available filter
277
+ filters = ec2.describe_volumes.call_args[1]["Filters"]
278
+ filter_names = {f["Name"] for f in filters}
279
+ assert "status" in filter_names
280
+
281
+
282
+ def test_find_orphan_ebs_volumes_excludes_live_instances():
283
+ """Volumes linked to a live instance should NOT be returned."""
284
+ ec2 = MagicMock()
285
+ ec2.describe_volumes.return_value = {
286
+ "Volumes": [
287
+ {
288
+ "VolumeId": "vol-attached",
289
+ "Size": 96,
290
+ "State": "available",
291
+ "Tags": [
292
+ {"Key": "created-by", "Value": "aws-bootstrap-g4dn"},
293
+ {"Key": "aws-bootstrap-instance", "Value": "i-live123"},
294
+ ],
295
+ }
296
+ ]
297
+ }
298
+ orphans = find_orphan_ebs_volumes(ec2, "aws-bootstrap-g4dn", live_instance_ids={"i-live123"})
299
+ assert orphans == []
300
+
301
+
302
+ def test_find_orphan_ebs_volumes_empty():
303
+ """No volumes at all should return empty list."""
304
+ ec2 = MagicMock()
305
+ ec2.describe_volumes.return_value = {"Volumes": []}
306
+ orphans = find_orphan_ebs_volumes(ec2, "aws-bootstrap-g4dn", live_instance_ids=set())
307
+ assert orphans == []
308
+
309
+
310
+ def test_find_orphan_ebs_volumes_skips_no_instance_tag():
311
+ """Volumes without aws-bootstrap-instance tag should be skipped."""
312
+ ec2 = MagicMock()
313
+ ec2.describe_volumes.return_value = {
314
+ "Volumes": [
315
+ {
316
+ "VolumeId": "vol-notag",
317
+ "Size": 10,
318
+ "State": "available",
319
+ "Tags": [{"Key": "created-by", "Value": "aws-bootstrap-g4dn"}],
320
+ }
321
+ ]
322
+ }
323
+ orphans = find_orphan_ebs_volumes(ec2, "aws-bootstrap-g4dn", live_instance_ids=set())
324
+ assert orphans == []
325
+
326
+
327
+ def test_find_orphan_ebs_volumes_client_error():
328
+ """ClientError should return empty list."""
329
+ ec2 = MagicMock()
330
+ ec2.describe_volumes.side_effect = botocore.exceptions.ClientError(
331
+ {"Error": {"Code": "UnauthorizedOperation", "Message": "no access"}},
332
+ "DescribeVolumes",
333
+ )
334
+ orphans = find_orphan_ebs_volumes(ec2, "aws-bootstrap-g4dn", live_instance_ids=set())
335
+ assert orphans == []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aws-bootstrap-g4dn
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Bootstrap AWS EC2 GPU instances for hybrid local-remote development
5
5
  Author: Adam Ever-Hadani
6
6
  License-Expression: MIT
@@ -317,6 +317,12 @@ aws-bootstrap cleanup
317
317
  # Preview what would be removed without modifying config
318
318
  aws-bootstrap cleanup --dry-run
319
319
 
320
+ # Also find and delete orphan EBS data volumes
321
+ aws-bootstrap cleanup --include-ebs
322
+
323
+ # Preview orphan volumes without deleting
324
+ aws-bootstrap cleanup --include-ebs --dry-run
325
+
320
326
  # Skip confirmation prompt
321
327
  aws-bootstrap cleanup --yes
322
328
  ```
@@ -350,6 +356,7 @@ Key behaviors:
350
356
  - New volumes are formatted as ext4; existing volumes are mounted as-is
351
357
  - Volumes are tagged for automatic discovery by `status` and `terminate`
352
358
  - `terminate` deletes data volumes by default; use `--keep-ebs` to preserve them
359
+ - **Orphan cleanup** — use `aws-bootstrap cleanup --include-ebs` to find and delete orphan volumes (e.g. from spot interruptions or forgotten `--keep-ebs` volumes). Use `--dry-run` to preview
353
360
  - **Spot-safe** — data volumes survive spot interruptions. If AWS reclaims your instance, the volume detaches automatically and can be reattached to a new instance with `--ebs-volume-id`
354
361
  - EBS volumes must be in the same availability zone as the instance
355
362
  - Mount failures are non-fatal — the instance remains usable
@@ -33,7 +33,7 @@ requires-dist = [
33
33
  dev = [
34
34
  { name = "mypy", specifier = ">=1.19.1" },
35
35
  { name = "pytest", specifier = ">=9.0.2" },
36
- { name = "ruff", specifier = ">=0.14" },
36
+ { name = "ruff", specifier = ">=0.15" },
37
37
  { name = "types-pyyaml", specifier = ">=6.0.12.20250915" },
38
38
  { name = "types-tabulate", specifier = ">=0.9.0.20241207" },
39
39
  ]