aws-bootstrap-g4dn 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,9 @@ from aws_bootstrap.ssh import (
10
10
  _next_alias,
11
11
  _read_ssh_config,
12
12
  add_ssh_host,
13
+ cleanup_stale_ssh_hosts,
13
14
  find_ssh_alias,
15
+ find_stale_ssh_hosts,
14
16
  get_ssh_host_details,
15
17
  list_ssh_hosts,
16
18
  remove_ssh_host,
@@ -407,3 +409,77 @@ def test_resolve_unknown_alias_returns_none(tmp_path):
407
409
  def test_resolve_nonexistent_config_returns_none(tmp_path):
408
410
  cfg = tmp_path / "no_such_file"
409
411
  assert resolve_instance_id("aws-gpu1", config_path=cfg) is None
412
+
413
+
414
+ # ---------------------------------------------------------------------------
415
+ # Cleanup: find_stale_ssh_hosts / cleanup_stale_ssh_hosts
416
+ # ---------------------------------------------------------------------------
417
+
418
+
419
+ def test_find_stale_ssh_hosts_finds_orphans(tmp_path):
420
+ cfg = _config_path(tmp_path)
421
+ add_ssh_host("i-111aaaa1", "1.1.1.1", "ubuntu", KEY_PATH, config_path=cfg)
422
+ add_ssh_host("i-222bbbb2", "2.2.2.2", "ubuntu", KEY_PATH, config_path=cfg)
423
+ stale = find_stale_ssh_hosts({"i-111aaaa1"}, config_path=cfg)
424
+ assert stale == [("i-222bbbb2", "aws-gpu2")]
425
+
426
+
427
+ def test_find_stale_ssh_hosts_none_stale(tmp_path):
428
+ cfg = _config_path(tmp_path)
429
+ add_ssh_host("i-111aaaa1", "1.1.1.1", "ubuntu", KEY_PATH, config_path=cfg)
430
+ add_ssh_host("i-222bbbb2", "2.2.2.2", "ubuntu", KEY_PATH, config_path=cfg)
431
+ stale = find_stale_ssh_hosts({"i-111aaaa1", "i-222bbbb2"}, config_path=cfg)
432
+ assert stale == []
433
+
434
+
435
+ def test_find_stale_ssh_hosts_all_stale(tmp_path):
436
+ cfg = _config_path(tmp_path)
437
+ add_ssh_host("i-111aaaa1", "1.1.1.1", "ubuntu", KEY_PATH, config_path=cfg)
438
+ add_ssh_host("i-222bbbb2", "2.2.2.2", "ubuntu", KEY_PATH, config_path=cfg)
439
+ stale = find_stale_ssh_hosts(set(), config_path=cfg)
440
+ assert len(stale) == 2
441
+ assert ("i-111aaaa1", "aws-gpu1") in stale
442
+ assert ("i-222bbbb2", "aws-gpu2") in stale
443
+
444
+
445
+ def test_find_stale_ssh_hosts_empty_config(tmp_path):
446
+ cfg = _config_path(tmp_path)
447
+ cfg.parent.mkdir(parents=True, exist_ok=True)
448
+ cfg.write_text("")
449
+ stale = find_stale_ssh_hosts(set(), config_path=cfg)
450
+ assert stale == []
451
+
452
+
453
+ def test_cleanup_stale_ssh_hosts_removes(tmp_path):
454
+ cfg = _config_path(tmp_path)
455
+ add_ssh_host("i-111aaaa1", "1.1.1.1", "ubuntu", KEY_PATH, config_path=cfg)
456
+ add_ssh_host("i-222bbbb2", "2.2.2.2", "ubuntu", KEY_PATH, config_path=cfg)
457
+ results = cleanup_stale_ssh_hosts({"i-111aaaa1"}, config_path=cfg)
458
+ assert len(results) == 1
459
+ assert results[0].instance_id == "i-222bbbb2"
460
+ assert results[0].alias == "aws-gpu2"
461
+ assert results[0].removed is True
462
+ # Verify it was actually removed from the config
463
+ content = cfg.read_text()
464
+ assert "i-222bbbb2" not in content
465
+ assert "i-111aaaa1" in content
466
+
467
+
468
+ def test_cleanup_stale_ssh_hosts_dry_run(tmp_path):
469
+ cfg = _config_path(tmp_path)
470
+ add_ssh_host("i-111aaaa1", "1.1.1.1", "ubuntu", KEY_PATH, config_path=cfg)
471
+ add_ssh_host("i-222bbbb2", "2.2.2.2", "ubuntu", KEY_PATH, config_path=cfg)
472
+ results = cleanup_stale_ssh_hosts({"i-111aaaa1"}, config_path=cfg, dry_run=True)
473
+ assert len(results) == 1
474
+ assert results[0].removed is False
475
+ # Verify config is unchanged
476
+ content = cfg.read_text()
477
+ assert "i-222bbbb2" in content
478
+ assert "i-111aaaa1" in content
479
+
480
+
481
+ def test_cleanup_stale_ssh_hosts_no_stale(tmp_path):
482
+ cfg = _config_path(tmp_path)
483
+ add_ssh_host("i-111aaaa1", "1.1.1.1", "ubuntu", KEY_PATH, config_path=cfg)
484
+ results = cleanup_stale_ssh_hosts({"i-111aaaa1"}, config_path=cfg)
485
+ assert results == []
@@ -0,0 +1,76 @@
1
+ """Tests for mount_ebs_volume SSH function."""
2
+
3
+ from __future__ import annotations
4
+ from pathlib import Path
5
+ from unittest.mock import MagicMock, patch
6
+
7
+ from aws_bootstrap.ssh import mount_ebs_volume
8
+
9
+
10
+ KEY_PATH = Path("/home/user/.ssh/id_ed25519.pub")
11
+
12
+
13
+ @patch("aws_bootstrap.ssh.subprocess.run")
14
+ def test_mount_ebs_volume_success_format(mock_run):
15
+ """New volume: SSH command includes mkfs."""
16
+ mock_run.return_value = MagicMock(returncode=0)
17
+
18
+ result = mount_ebs_volume("1.2.3.4", "ubuntu", KEY_PATH, "vol-abc123", format_volume=True)
19
+
20
+ assert result is True
21
+ mock_run.assert_called_once()
22
+ cmd = mock_run.call_args[0][0]
23
+ script = cmd[-1]
24
+ assert "mkfs.ext4" in script
25
+ assert "/data" in script
26
+ assert "volabc123" in script # stripped vol- hyphen
27
+
28
+
29
+ @patch("aws_bootstrap.ssh.subprocess.run")
30
+ def test_mount_ebs_volume_success_no_format(mock_run):
31
+ """Existing volume: SSH command skips mkfs."""
32
+ mock_run.return_value = MagicMock(returncode=0)
33
+
34
+ result = mount_ebs_volume("1.2.3.4", "ubuntu", KEY_PATH, "vol-abc123", format_volume=False)
35
+
36
+ assert result is True
37
+ mock_run.assert_called_once()
38
+ cmd = mock_run.call_args[0][0]
39
+ script = cmd[-1]
40
+ assert "mkfs" not in script
41
+ assert "/data" in script
42
+
43
+
44
+ @patch("aws_bootstrap.ssh.subprocess.run")
45
+ def test_mount_ebs_volume_failure(mock_run):
46
+ """Non-zero exit code returns False."""
47
+ mock_run.return_value = MagicMock(returncode=1)
48
+
49
+ result = mount_ebs_volume("1.2.3.4", "ubuntu", KEY_PATH, "vol-abc123")
50
+
51
+ assert result is False
52
+
53
+
54
+ @patch("aws_bootstrap.ssh.subprocess.run")
55
+ def test_mount_ebs_volume_custom_port(mock_run):
56
+ """Non-default port is passed as -p flag."""
57
+ mock_run.return_value = MagicMock(returncode=0)
58
+
59
+ mount_ebs_volume("1.2.3.4", "ubuntu", KEY_PATH, "vol-abc123", port=2222)
60
+
61
+ cmd = mock_run.call_args[0][0]
62
+ assert "-p" in cmd
63
+ port_idx = cmd.index("-p")
64
+ assert cmd[port_idx + 1] == "2222"
65
+
66
+
67
+ @patch("aws_bootstrap.ssh.subprocess.run")
68
+ def test_mount_ebs_volume_custom_mount_point(mock_run):
69
+ """Custom mount point appears in the SSH script."""
70
+ mock_run.return_value = MagicMock(returncode=0)
71
+
72
+ mount_ebs_volume("1.2.3.4", "ubuntu", KEY_PATH, "vol-abc123", mount_point="/mnt/data")
73
+
74
+ cmd = mock_run.call_args[0][0]
75
+ script = cmd[-1]
76
+ assert "/mnt/data" in script
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aws-bootstrap-g4dn
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Bootstrap AWS EC2 GPU instances for hybrid local-remote development
5
5
  Author: Adam Ever-Hadani
6
6
  License-Expression: MIT
@@ -44,7 +44,8 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
44
44
  | 📊 | **GPU benchmark included** | CNN (MNIST) + Transformer benchmarks with FP16/FP32/BF16 precision and tqdm progress |
45
45
  | 📓 | **Jupyter ready** | Lab server auto-starts as a systemd service on port 8888 — just SSH tunnel and open |
46
46
  | 🖥️ | **`status --gpu`** | Shows CUDA toolkit version, driver max, GPU architecture, spot pricing, uptime, and estimated cost |
47
- | 🗑️ | **Clean terminate** | Stops instances, removes SSH aliases, shows shutting-down state until fully gone |
47
+ | 💾 | **EBS data volumes** | Attach persistent storage at `/data` survives spot interruptions and termination, reattach to new instances |
48
+ | 🗑️ | **Clean terminate** | Stops instances, removes SSH aliases, cleans up EBS volumes (or preserves with `--keep-ebs`) |
48
49
 
49
50
  ### 🎯 Target Workflows
50
51
 
@@ -132,16 +133,24 @@ aws-bootstrap launch --python-version 3.13
132
133
  # Use a non-default SSH port
133
134
  aws-bootstrap launch --ssh-port 2222
134
135
 
136
+ # Attach a persistent EBS data volume (96 GB gp3, mounted at /data)
137
+ aws-bootstrap launch --ebs-storage 96
138
+
139
+ # Reattach an existing EBS volume from a previous instance
140
+ aws-bootstrap launch --ebs-volume-id vol-0abc123def456
141
+
135
142
  # Use a specific AWS profile
136
143
  aws-bootstrap launch --profile my-aws-profile
137
144
  ```
138
145
 
139
146
  After launch, the CLI:
140
147
 
141
- 1. **Adds an SSH alias** (e.g. `aws-gpu1`) to `~/.ssh/config`
142
- 2. **Runs remote setup** installs utilities, creates a Python venv, installs CUDA-matched PyTorch, sets up Jupyter
143
- 3. **Runs a CUDA smoke test** — verifies `torch.cuda.is_available()` and runs a quick GPU matmul
144
- 4. **Prints connection commands** — SSH, Jupyter tunnel, GPU benchmark, and terminate
148
+ 1. **Creates/attaches EBS volume** (if `--ebs-storage` or `--ebs-volume-id` was specified)
149
+ 2. **Adds an SSH alias** (e.g. `aws-gpu1`) to `~/.ssh/config`
150
+ 3. **Runs remote setup** — installs utilities, creates a Python venv, installs CUDA-matched PyTorch, sets up Jupyter
151
+ 4. **Mounts EBS volume** at `/data` (if applicable formats new volumes, mounts existing ones as-is)
152
+ 5. **Runs a CUDA smoke test** — verifies `torch.cuda.is_available()` and runs a quick GPU matmul
153
+ 6. **Prints connection commands** — SSH, Jupyter tunnel, GPU benchmark, and terminate
145
154
 
146
155
  ```bash
147
156
  ssh aws-gpu1 # venv auto-activates on login
@@ -154,7 +163,7 @@ The setup script runs automatically on the instance after SSH becomes available:
154
163
  | Step | What |
155
164
  |------|------|
156
165
  | **GPU verify** | Confirms `nvidia-smi` and `nvcc` are working |
157
- | **Utilities** | Installs `htop`, `tmux`, `tree`, `jq` |
166
+ | **Utilities** | Installs `htop`, `tmux`, `tree`, `jq`, `ffmpeg` |
158
167
  | **Python venv** | Creates `~/venv` with `uv`, auto-activates in `~/.bashrc`. Use `--python-version` to pin a specific Python (e.g. `3.13`) |
159
168
  | **CUDA-aware PyTorch** | Detects CUDA toolkit version → installs PyTorch from the matching `cu{TAG}` wheel index |
160
169
  | **CUDA smoke test** | Runs `torch.cuda.is_available()` + GPU matmul to verify the stack |
@@ -261,6 +270,9 @@ aws-bootstrap status --region us-east-1
261
270
  # Terminate all aws-bootstrap instances (with confirmation prompt)
262
271
  aws-bootstrap terminate
263
272
 
273
+ # Terminate but preserve EBS data volumes for reuse
274
+ aws-bootstrap terminate --keep-ebs
275
+
264
276
  # Terminate by SSH alias (resolved via ~/.ssh/config)
265
277
  aws-bootstrap terminate aws-gpu1
266
278
 
@@ -272,6 +284,15 @@ aws-bootstrap terminate aws-gpu1 i-def456
272
284
 
273
285
  # Skip confirmation prompt
274
286
  aws-bootstrap terminate --yes
287
+
288
+ # Remove stale SSH config entries for terminated instances
289
+ aws-bootstrap cleanup
290
+
291
+ # Preview what would be removed without modifying config
292
+ aws-bootstrap cleanup --dry-run
293
+
294
+ # Skip confirmation prompt
295
+ aws-bootstrap cleanup --yes
275
296
  ```
276
297
 
277
298
  `status --gpu` reports both the **installed CUDA toolkit** version (from `nvcc`) and the **maximum CUDA version supported by the driver** (from `nvidia-smi`), so you can see at a glance whether they match:
@@ -282,6 +303,31 @@ CUDA: 12.8 (driver supports up to 13.0)
282
303
 
283
304
  SSH aliases are managed automatically — they're created on `launch`, shown in `status`, and cleaned up on `terminate`. Aliases use sequential numbering (`aws-gpu1`, `aws-gpu2`, etc.) and never reuse numbers from previous instances. You can use aliases anywhere you'd use an instance ID, e.g. `aws-bootstrap terminate aws-gpu1`.
284
305
 
306
+ ## EBS Data Volumes
307
+
308
+ Attach persistent EBS storage to keep datasets and model checkpoints across instance lifecycles. Volumes are mounted at `/data` and persist independently of the instance.
309
+
310
+ ```bash
311
+ # Create a new 96 GB gp3 volume, formatted and mounted at /data
312
+ aws-bootstrap launch --ebs-storage 96
313
+
314
+ # After terminating with --keep-ebs, reattach the same volume to a new instance
315
+ aws-bootstrap terminate --keep-ebs
316
+ # Output: Preserving EBS volume: vol-0abc123...
317
+ # Reattach with: aws-bootstrap launch --ebs-volume-id vol-0abc123...
318
+
319
+ aws-bootstrap launch --ebs-volume-id vol-0abc123def456
320
+ ```
321
+
322
+ Key behaviors:
323
+ - `--ebs-storage` and `--ebs-volume-id` are mutually exclusive
324
+ - New volumes are formatted as ext4; existing volumes are mounted as-is
325
+ - Volumes are tagged for automatic discovery by `status` and `terminate`
326
+ - `terminate` deletes data volumes by default; use `--keep-ebs` to preserve them
327
+ - **Spot-safe** — data volumes survive spot interruptions. If AWS reclaims your instance, the volume detaches automatically and can be reattached to a new instance with `--ebs-volume-id`
328
+ - EBS volumes must be in the same availability zone as the instance
329
+ - Mount failures are non-fatal — the instance remains usable
330
+
285
331
  ## EC2 vCPU Quotas
286
332
 
287
333
  AWS accounts have [service quotas](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-resource-limits.html) that limit how many vCPUs you can run per instance family. New or lightly-used accounts often have a **default quota of 0 vCPUs** for GPU instance families (G and VT), which will cause errors on launch:
@@ -1,27 +1,29 @@
1
1
  aws_bootstrap/__init__.py,sha256=kl_jvrunGyIyizdRqAP6ROb5P1BBrXX5PTq5gq1ipU0,82
2
- aws_bootstrap/cli.py,sha256=N2hT0XEC-4k5Cs3iGfA_xt_onc__NMNmh8fCaV4frgc,21076
3
- aws_bootstrap/config.py,sha256=TeCOYDlijT-KD5SFIzc-VvBhOqcq9YCgen9NK63rka8,895
4
- aws_bootstrap/ec2.py,sha256=LHpzW91ayK45gsWV_B4LanSZIhWggqTsL31qHUceiaA,12274
2
+ aws_bootstrap/cli.py,sha256=n3Ep_7zhBiRSU4ZUeGVqTRb81nzo98mxzQSKdAuiopY,27788
3
+ aws_bootstrap/config.py,sha256=p770XgjfuK1-wVkAEeBdtJSVkc58DKFHgaJlZ-zbGmk,967
4
+ aws_bootstrap/ec2.py,sha256=uNqxWWfPfGCbujQ3eonvqjjxLE76fsEyNchPS6byR6c,16719
5
5
  aws_bootstrap/gpu.py,sha256=WTnHR0s3mQHDlnzqRgqAC6omWz7nT5YtGpcs0Bf88jk,692
6
- aws_bootstrap/ssh.py,sha256=0acHNX7IG6PUvp6T72l9kHTwUs5sVXFAyJXvUfA3qnE,20131
6
+ aws_bootstrap/ssh.py,sha256=xY0Yn5q4aA0Xb3ejNY-KCbooZArXRGpimSnbJiBLI_w,24059
7
7
  aws_bootstrap/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  aws_bootstrap/resources/gpu_benchmark.py,sha256=1eFt_3MXvoLhs9HahrRPhbxvtdjFaXG2Ty3GEg7Gud0,29366
9
9
  aws_bootstrap/resources/gpu_smoke_test.ipynb,sha256=XvAOEIPa5H9ri5mRZqOdknmwOwKNvCME6DzBGuhRYfg,10698
10
10
  aws_bootstrap/resources/launch.json,sha256=ZOcvHLy3-zBOqRTtFzuyn-_2tB64yuEn8PrJOoZ-PgE,1484
11
- aws_bootstrap/resources/remote_setup.sh,sha256=z_YGdzwEHWInkE3dZVbBNa0F_joTeVhnOpCYOj1CK30,8331
11
+ aws_bootstrap/resources/remote_setup.sh,sha256=i9qXzAqYrnDWt6nFrqeghYWVHmbbzP-OS7O_YJB3GYU,8400
12
12
  aws_bootstrap/resources/requirements.txt,sha256=gpYl1MFCfWXiAhbIUgAjuTHONz3MKci25msIyOkMmUk,75
13
13
  aws_bootstrap/resources/saxpy.cu,sha256=1BSESEwGGCx3KWx9ZJ8jiPHQ42KzQN6i2aP0I28bPsA,1178
14
14
  aws_bootstrap/resources/tasks.json,sha256=6U8pB1N8YIWgUCfFet4ne3nYnI92tWv5D5kPiQG3Zlg,1576
15
15
  aws_bootstrap/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- aws_bootstrap/tests/test_cli.py,sha256=m_4tIX0xYZ8BxDkHPGAWiPAKo4vETaTSKSJbyn3K1Cg,34731
17
- aws_bootstrap/tests/test_config.py,sha256=arvET6KNl4Vqsz0zFrSdhciXGU688bfsvCr3dSpziN0,1050
16
+ aws_bootstrap/tests/test_cli.py,sha256=OOYzopo8gkp6fO1MT10fsqAhIs5pN9cCpJyOzWNXHCg,48638
17
+ aws_bootstrap/tests/test_config.py,sha256=vspSGoben_i7m4Fh6UGSes6Fkr789Y1eaOLe54fRSGc,1524
18
+ aws_bootstrap/tests/test_ebs.py,sha256=B2HrgSmS7yroz6zzRuPxKIXmQGlWesuGqOtybyZmHJQ,7582
18
19
  aws_bootstrap/tests/test_ec2.py,sha256=Jmqsjv973hxXbZWfGgECtm6aa2156Lzji227sYMBuMg,10547
19
20
  aws_bootstrap/tests/test_gpu.py,sha256=rbMuda_sIVbaCzkWXoLv9YIfnWztgRoP7NuVL8XHrUY,3871
20
- aws_bootstrap/tests/test_ssh_config.py,sha256=YYtv82zBBLGioTo58iC31_5jUli1s0eoGV9VRCobOgY,14059
21
+ aws_bootstrap/tests/test_ssh_config.py,sha256=qy3UDdvkTfrALiF-W3m8aKvnQj3BeCrZdLjG75tcVJU,17131
22
+ aws_bootstrap/tests/test_ssh_ebs.py,sha256=ipt0xOzdf3kfkVt42Dgr_z7D6JDIMuRi3DqX0OP8sm0,2342
21
23
  aws_bootstrap/tests/test_ssh_gpu.py,sha256=dRp86Og-8GqiATSff3rxhu83mBZdGgqI4UOnoC00Ln0,1454
22
- aws_bootstrap_g4dn-0.5.0.dist-info/licenses/LICENSE,sha256=Hen77Mt8sazSQJ9DgrmZuAvDwo2vc5JAkR_avuFV-CM,1067
23
- aws_bootstrap_g4dn-0.5.0.dist-info/METADATA,sha256=t8m53ZodJlZyMffeSu3Wk5bMt-Dm_Jl3q_HTbRLQbYE,13728
24
- aws_bootstrap_g4dn-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
25
- aws_bootstrap_g4dn-0.5.0.dist-info/entry_points.txt,sha256=T8FXfOgmLEvFi8DHaFJ3tCzId9J3_d2Y6qT98OXxCjA,57
26
- aws_bootstrap_g4dn-0.5.0.dist-info/top_level.txt,sha256=mix9gZRs8JUv0OMSB_rwdGcRnTKzsKgHrE5fyAn5zJw,14
27
- aws_bootstrap_g4dn-0.5.0.dist-info/RECORD,,
24
+ aws_bootstrap_g4dn-0.6.0.dist-info/licenses/LICENSE,sha256=Hen77Mt8sazSQJ9DgrmZuAvDwo2vc5JAkR_avuFV-CM,1067
25
+ aws_bootstrap_g4dn-0.6.0.dist-info/METADATA,sha256=Ot9yCJfJup1ZzW-0cq99zT9bbswHnRJ4SWxEsJ-pK58,15859
26
+ aws_bootstrap_g4dn-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
27
+ aws_bootstrap_g4dn-0.6.0.dist-info/entry_points.txt,sha256=T8FXfOgmLEvFi8DHaFJ3tCzId9J3_d2Y6qT98OXxCjA,57
28
+ aws_bootstrap_g4dn-0.6.0.dist-info/top_level.txt,sha256=mix9gZRs8JUv0OMSB_rwdGcRnTKzsKgHrE5fyAn5zJw,14
29
+ aws_bootstrap_g4dn-0.6.0.dist-info/RECORD,,