aws-bootstrap-g4dn 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/.github/workflows/ci.yml +1 -1
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/.github/workflows/publish-to-pypi.yml +2 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/CLAUDE.md +7 -5
- {aws_bootstrap_g4dn-0.1.0/aws_bootstrap_g4dn.egg-info → aws_bootstrap_g4dn-0.3.0}/PKG-INFO +27 -5
- aws_bootstrap_g4dn-0.1.0/PKG-INFO → aws_bootstrap_g4dn-0.3.0/README.md +22 -19
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/cli.py +109 -12
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/config.py +2 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/ec2.py +3 -3
- aws_bootstrap_g4dn-0.3.0/aws_bootstrap/gpu.py +27 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/resources/remote_setup.sh +7 -1
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/ssh.py +47 -47
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/tests/test_cli.py +315 -7
- aws_bootstrap_g4dn-0.1.0/aws_bootstrap/tests/test_ssh_gpu.py → aws_bootstrap_g4dn-0.3.0/aws_bootstrap/tests/test_gpu.py +3 -43
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/tests/test_ssh_config.py +36 -0
- aws_bootstrap_g4dn-0.3.0/aws_bootstrap/tests/test_ssh_gpu.py +44 -0
- aws_bootstrap_g4dn-0.1.0/README.md → aws_bootstrap_g4dn-0.3.0/aws_bootstrap_g4dn.egg-info/PKG-INFO +41 -3
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap_g4dn.egg-info/SOURCES.txt +2 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/pyproject.toml +7 -2
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/uv.lock +35 -1
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/.gitignore +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/.pre-commit-config.yaml +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/CODE_OF_CONDUCT.md +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/CONTRIBUTING.md +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/LICENSE +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/SECURITY.md +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/__init__.py +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/resources/__init__.py +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/resources/gpu_benchmark.py +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/resources/gpu_smoke_test.ipynb +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/resources/requirements.txt +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/tests/__init__.py +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/tests/test_config.py +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/tests/test_ec2.py +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap_g4dn.egg-info/dependency_links.txt +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap_g4dn.egg-info/entry_points.txt +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap_g4dn.egg-info/requires.txt +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap_g4dn.egg-info/top_level.txt +0 -0
- {aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/setup.cfg +0 -0
|
@@ -10,7 +10,7 @@ Target workflows: Jupyter server-client, VSCode Remote SSH, and NVIDIA Nsight re
|
|
|
10
10
|
|
|
11
11
|
## Tech Stack & Requirements
|
|
12
12
|
|
|
13
|
-
- **Python 3.
|
|
13
|
+
- **Python 3.12+** with **uv** package manager (astral-sh/uv) — used for venv creation, dependency management, and running the project
|
|
14
14
|
- **boto3** — AWS SDK for EC2 provisioning (AMI lookup, security groups, instance launch, waiters)
|
|
15
15
|
- **click** — CLI framework with built-in color support (`click.secho`, `click.style`)
|
|
16
16
|
- **setuptools + setuptools-scm** — build backend with git-tag-based versioning (configured in pyproject.toml)
|
|
@@ -33,7 +33,8 @@ aws_bootstrap/
|
|
|
33
33
|
cli.py # Click CLI entry point (launch, status, terminate commands)
|
|
34
34
|
config.py # LaunchConfig dataclass with defaults
|
|
35
35
|
ec2.py # AMI lookup, security group, instance launch/find/terminate, polling, spot pricing
|
|
36
|
-
|
|
36
|
+
gpu.py # GPU architecture mapping and GpuInfo dataclass
|
|
37
|
+
ssh.py # SSH key pair import, SSH readiness check, remote setup, ~/.ssh/config management, GPU queries
|
|
37
38
|
resources/ # Non-Python artifacts SCP'd to remote instances
|
|
38
39
|
__init__.py
|
|
39
40
|
gpu_benchmark.py # GPU throughput benchmark (CNN + Transformer), copied to ~/gpu_benchmark.py on instance
|
|
@@ -44,6 +45,7 @@ aws_bootstrap/
|
|
|
44
45
|
test_config.py
|
|
45
46
|
test_cli.py
|
|
46
47
|
test_ec2.py
|
|
48
|
+
test_gpu.py
|
|
47
49
|
test_ssh_config.py
|
|
48
50
|
test_ssh_gpu.py
|
|
49
51
|
docs/
|
|
@@ -54,8 +56,8 @@ Entry point: `aws-bootstrap = "aws_bootstrap.cli:main"` (installed via `uv sync`
|
|
|
54
56
|
|
|
55
57
|
## CLI Commands
|
|
56
58
|
|
|
57
|
-
- **`launch`** — provisions an EC2 instance (spot by default, falls back to on-demand on capacity errors); adds SSH config alias (e.g. `aws-gpu1`) to `~/.ssh/config`
|
|
58
|
-
- **`status`** — lists all non-terminated instances (including `shutting-down`) with type, IP, SSH alias, pricing (spot price/hr or on-demand), uptime, and estimated cost for running spot instances; `--gpu` flag queries GPU info via SSH, reporting both CUDA toolkit version (from `nvcc`) and driver-supported max (from `nvidia-smi`)
|
|
59
|
+
- **`launch`** — provisions an EC2 instance (spot by default, falls back to on-demand on capacity errors); adds SSH config alias (e.g. `aws-gpu1`) to `~/.ssh/config`; `--python-version` controls which Python `uv` installs in the remote venv; `--ssh-port` overrides the default SSH port (22) for security group ingress, connection checks, and SSH config
|
|
60
|
+
- **`status`** — lists all non-terminated instances (including `shutting-down`) with type, IP, SSH alias, pricing (spot price/hr or on-demand), uptime, and estimated cost for running spot instances; `--gpu` flag queries GPU info via SSH, reporting both CUDA toolkit version (from `nvcc`) and driver-supported max (from `nvidia-smi`); `--instructions` (default: on) prints connection commands (SSH, Jupyter tunnel, VSCode Remote SSH, GPU benchmark) for each running instance; suppress with `--no-instructions`
|
|
59
61
|
- **`terminate`** — terminates instances by ID or all aws-bootstrap instances in the region; removes SSH config aliases
|
|
60
62
|
- **`list instance-types`** — lists EC2 instance types matching a family prefix (default: `g4dn`), showing vCPUs, memory, and GPU info
|
|
61
63
|
- **`list amis`** — lists available AMIs matching a name pattern (default: Deep Learning Base OSS Nvidia Driver GPU AMIs), sorted newest-first
|
|
@@ -96,7 +98,7 @@ The `KNOWN_CUDA_TAGS` array in `remote_setup.sh` lists the CUDA wheel tags publi
|
|
|
96
98
|
## Remote Setup Details
|
|
97
99
|
|
|
98
100
|
`remote_setup.sh` also:
|
|
99
|
-
- Creates `~/venv` and appends `source ~/venv/bin/activate` to `~/.bashrc` so the venv is auto-activated on SSH login
|
|
101
|
+
- Creates `~/venv` and appends `source ~/venv/bin/activate` to `~/.bashrc` so the venv is auto-activated on SSH login. When `--python-version` is passed to `launch`, the CLI sets `PYTHON_VERSION` as an inline env var on the SSH command; `remote_setup.sh` reads it to run `uv python install` and `uv venv --python` with the requested version
|
|
100
102
|
- Runs a quick CUDA smoke test (`torch.cuda.is_available()` + GPU matmul) after PyTorch installation to verify the GPU stack; prints a WARNING on failure but does not abort
|
|
101
103
|
- Copies `gpu_benchmark.py` to `~/gpu_benchmark.py` and `gpu_smoke_test.ipynb` to `~/gpu_smoke_test.ipynb`
|
|
102
104
|
|
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aws-bootstrap-g4dn
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Bootstrap AWS EC2 GPU instances for hybrid local-remote development
|
|
5
5
|
Author: Adam Ever-Hadani
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/promptromp/aws-bootstrap-g4dn
|
|
8
8
|
Project-URL: Issues, https://github.com/promptromp/aws-bootstrap-g4dn/issues
|
|
9
9
|
Keywords: aws,ec2,gpu,cuda,deep-learning,spot-instances,cli
|
|
10
|
-
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
13
|
+
Requires-Python: >=3.12
|
|
11
14
|
Description-Content-Type: text/markdown
|
|
12
15
|
License-File: LICENSE
|
|
13
16
|
Requires-Dist: boto3>=1.35
|
|
@@ -55,7 +58,7 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
|
|
|
55
58
|
|
|
56
59
|
1. AWS profile configured with relevant permissions (profile name can be passed via `--profile` or read from `AWS_PROFILE` env var)
|
|
57
60
|
2. AWS CLI v2 — see [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)
|
|
58
|
-
3. Python 3.
|
|
61
|
+
3. Python 3.12+ and [uv](https://github.com/astral-sh/uv)
|
|
59
62
|
4. An SSH key pair (see below)
|
|
60
63
|
|
|
61
64
|
## Installation
|
|
@@ -66,6 +69,16 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
|
|
|
66
69
|
pip install aws-bootstrap-g4dn
|
|
67
70
|
```
|
|
68
71
|
|
|
72
|
+
### With uvx (no install needed)
|
|
73
|
+
|
|
74
|
+
[uvx](https://docs.astral.sh/uv/guides/tools/) runs the CLI directly in a temporary environment — no global install required:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uvx --from aws-bootstrap-g4dn aws-bootstrap launch
|
|
78
|
+
uvx --from aws-bootstrap-g4dn aws-bootstrap status
|
|
79
|
+
uvx --from aws-bootstrap-g4dn aws-bootstrap terminate
|
|
80
|
+
```
|
|
81
|
+
|
|
69
82
|
### From source (development)
|
|
70
83
|
|
|
71
84
|
```bash
|
|
@@ -75,7 +88,7 @@ uv venv
|
|
|
75
88
|
uv sync
|
|
76
89
|
```
|
|
77
90
|
|
|
78
|
-
|
|
91
|
+
All methods install the `aws-bootstrap` CLI.
|
|
79
92
|
|
|
80
93
|
## SSH Key Setup
|
|
81
94
|
|
|
@@ -113,6 +126,12 @@ aws-bootstrap launch --on-demand --instance-type g5.xlarge --region us-east-1
|
|
|
113
126
|
# Launch without running the remote setup script
|
|
114
127
|
aws-bootstrap launch --no-setup
|
|
115
128
|
|
|
129
|
+
# Use a specific Python version in the remote venv
|
|
130
|
+
aws-bootstrap launch --python-version 3.13
|
|
131
|
+
|
|
132
|
+
# Use a non-default SSH port
|
|
133
|
+
aws-bootstrap launch --ssh-port 2222
|
|
134
|
+
|
|
116
135
|
# Use a specific AWS profile
|
|
117
136
|
aws-bootstrap launch --profile my-aws-profile
|
|
118
137
|
```
|
|
@@ -136,7 +155,7 @@ The setup script runs automatically on the instance after SSH becomes available:
|
|
|
136
155
|
|------|------|
|
|
137
156
|
| **GPU verify** | Confirms `nvidia-smi` and `nvcc` are working |
|
|
138
157
|
| **Utilities** | Installs `htop`, `tmux`, `tree`, `jq` |
|
|
139
|
-
| **Python venv** | Creates `~/venv` with `uv`, auto-activates in `~/.bashrc` |
|
|
158
|
+
| **Python venv** | Creates `~/venv` with `uv`, auto-activates in `~/.bashrc`. Use `--python-version` to pin a specific Python (e.g. `3.13`) |
|
|
140
159
|
| **CUDA-aware PyTorch** | Detects CUDA toolkit version → installs PyTorch from the matching `cu{TAG}` wheel index |
|
|
141
160
|
| **CUDA smoke test** | Runs `torch.cuda.is_available()` + GPU matmul to verify the stack |
|
|
142
161
|
| **GPU benchmark** | Copies `gpu_benchmark.py` to `~/gpu_benchmark.py` |
|
|
@@ -210,6 +229,9 @@ aws-bootstrap status
|
|
|
210
229
|
# Include GPU info (CUDA toolkit + driver version, GPU name, architecture) via SSH
|
|
211
230
|
aws-bootstrap status --gpu
|
|
212
231
|
|
|
232
|
+
# Hide connection commands (shown by default for each running instance)
|
|
233
|
+
aws-bootstrap status --no-instructions
|
|
234
|
+
|
|
213
235
|
# List instances in a specific region
|
|
214
236
|
aws-bootstrap status --region us-east-1
|
|
215
237
|
|
|
@@ -1,19 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: aws-bootstrap-g4dn
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Bootstrap AWS EC2 GPU instances for hybrid local-remote development
|
|
5
|
-
Author: Adam Ever-Hadani
|
|
6
|
-
License-Expression: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/promptromp/aws-bootstrap-g4dn
|
|
8
|
-
Project-URL: Issues, https://github.com/promptromp/aws-bootstrap-g4dn/issues
|
|
9
|
-
Keywords: aws,ec2,gpu,cuda,deep-learning,spot-instances,cli
|
|
10
|
-
Requires-Python: >=3.14
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE
|
|
13
|
-
Requires-Dist: boto3>=1.35
|
|
14
|
-
Requires-Dist: click>=8.1
|
|
15
|
-
Dynamic: license-file
|
|
16
|
-
|
|
17
1
|
# aws-bootstrap-g4dn
|
|
18
2
|
|
|
19
3
|
--------------------------------------------------------------------------------
|
|
@@ -55,7 +39,7 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
|
|
|
55
39
|
|
|
56
40
|
1. AWS profile configured with relevant permissions (profile name can be passed via `--profile` or read from `AWS_PROFILE` env var)
|
|
57
41
|
2. AWS CLI v2 — see [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)
|
|
58
|
-
3. Python 3.
|
|
42
|
+
3. Python 3.12+ and [uv](https://github.com/astral-sh/uv)
|
|
59
43
|
4. An SSH key pair (see below)
|
|
60
44
|
|
|
61
45
|
## Installation
|
|
@@ -66,6 +50,16 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
|
|
|
66
50
|
pip install aws-bootstrap-g4dn
|
|
67
51
|
```
|
|
68
52
|
|
|
53
|
+
### With uvx (no install needed)
|
|
54
|
+
|
|
55
|
+
[uvx](https://docs.astral.sh/uv/guides/tools/) runs the CLI directly in a temporary environment — no global install required:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
uvx --from aws-bootstrap-g4dn aws-bootstrap launch
|
|
59
|
+
uvx --from aws-bootstrap-g4dn aws-bootstrap status
|
|
60
|
+
uvx --from aws-bootstrap-g4dn aws-bootstrap terminate
|
|
61
|
+
```
|
|
62
|
+
|
|
69
63
|
### From source (development)
|
|
70
64
|
|
|
71
65
|
```bash
|
|
@@ -75,7 +69,7 @@ uv venv
|
|
|
75
69
|
uv sync
|
|
76
70
|
```
|
|
77
71
|
|
|
78
|
-
|
|
72
|
+
All methods install the `aws-bootstrap` CLI.
|
|
79
73
|
|
|
80
74
|
## SSH Key Setup
|
|
81
75
|
|
|
@@ -113,6 +107,12 @@ aws-bootstrap launch --on-demand --instance-type g5.xlarge --region us-east-1
|
|
|
113
107
|
# Launch without running the remote setup script
|
|
114
108
|
aws-bootstrap launch --no-setup
|
|
115
109
|
|
|
110
|
+
# Use a specific Python version in the remote venv
|
|
111
|
+
aws-bootstrap launch --python-version 3.13
|
|
112
|
+
|
|
113
|
+
# Use a non-default SSH port
|
|
114
|
+
aws-bootstrap launch --ssh-port 2222
|
|
115
|
+
|
|
116
116
|
# Use a specific AWS profile
|
|
117
117
|
aws-bootstrap launch --profile my-aws-profile
|
|
118
118
|
```
|
|
@@ -136,7 +136,7 @@ The setup script runs automatically on the instance after SSH becomes available:
|
|
|
136
136
|
|------|------|
|
|
137
137
|
| **GPU verify** | Confirms `nvidia-smi` and `nvcc` are working |
|
|
138
138
|
| **Utilities** | Installs `htop`, `tmux`, `tree`, `jq` |
|
|
139
|
-
| **Python venv** | Creates `~/venv` with `uv`, auto-activates in `~/.bashrc` |
|
|
139
|
+
| **Python venv** | Creates `~/venv` with `uv`, auto-activates in `~/.bashrc`. Use `--python-version` to pin a specific Python (e.g. `3.13`) |
|
|
140
140
|
| **CUDA-aware PyTorch** | Detects CUDA toolkit version → installs PyTorch from the matching `cu{TAG}` wheel index |
|
|
141
141
|
| **CUDA smoke test** | Runs `torch.cuda.is_available()` + GPU matmul to verify the stack |
|
|
142
142
|
| **GPU benchmark** | Copies `gpu_benchmark.py` to `~/gpu_benchmark.py` |
|
|
@@ -210,6 +210,9 @@ aws-bootstrap status
|
|
|
210
210
|
# Include GPU info (CUDA toolkit + driver version, GPU name, architecture) via SSH
|
|
211
211
|
aws-bootstrap status --gpu
|
|
212
212
|
|
|
213
|
+
# Hide connection commands (shown by default for each running instance)
|
|
214
|
+
aws-bootstrap status --no-instructions
|
|
215
|
+
|
|
213
216
|
# List instances in a specific region
|
|
214
217
|
aws-bootstrap status --region us-east-1
|
|
215
218
|
|
|
@@ -5,6 +5,7 @@ from datetime import UTC, datetime
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
7
|
import boto3
|
|
8
|
+
import botocore.exceptions
|
|
8
9
|
import click
|
|
9
10
|
|
|
10
11
|
from .config import LaunchConfig
|
|
@@ -56,7 +57,39 @@ def warn(msg: str) -> None:
|
|
|
56
57
|
click.secho(f" WARNING: {msg}", fg="yellow", err=True)
|
|
57
58
|
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
class _AWSGroup(click.Group):
|
|
61
|
+
"""Click group that catches common AWS credential/auth errors."""
|
|
62
|
+
|
|
63
|
+
def invoke(self, ctx):
|
|
64
|
+
try:
|
|
65
|
+
return super().invoke(ctx)
|
|
66
|
+
except botocore.exceptions.NoCredentialsError:
|
|
67
|
+
raise CLIError(
|
|
68
|
+
"Unable to locate AWS credentials.\n\n"
|
|
69
|
+
" Make sure you have configured AWS credentials using one of:\n"
|
|
70
|
+
" - Set the AWS_PROFILE environment variable: export AWS_PROFILE=<profile-name>\n"
|
|
71
|
+
" - Pass --profile to the command: aws-bootstrap <command> --profile <profile-name>\n"
|
|
72
|
+
" - Configure a default profile: aws configure\n\n"
|
|
73
|
+
" See: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html"
|
|
74
|
+
) from None
|
|
75
|
+
except botocore.exceptions.ProfileNotFound as e:
|
|
76
|
+
raise CLIError(f"{e}\n\n List available profiles with: aws configure list-profiles") from None
|
|
77
|
+
except botocore.exceptions.PartialCredentialsError as e:
|
|
78
|
+
raise CLIError(
|
|
79
|
+
f"Incomplete AWS credentials: {e}\n\n Check your AWS configuration with: aws configure list"
|
|
80
|
+
) from None
|
|
81
|
+
except botocore.exceptions.ClientError as e:
|
|
82
|
+
code = e.response["Error"]["Code"]
|
|
83
|
+
if code in ("AuthFailure", "UnauthorizedOperation", "ExpiredTokenException", "ExpiredToken"):
|
|
84
|
+
raise CLIError(
|
|
85
|
+
f"AWS authorization failed: {e.response['Error']['Message']}\n\n"
|
|
86
|
+
" Your credentials may be expired or lack the required permissions.\n"
|
|
87
|
+
" Check your AWS configuration with: aws configure list"
|
|
88
|
+
) from None
|
|
89
|
+
raise
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@click.group(cls=_AWSGroup)
|
|
60
93
|
@click.version_option(package_name="aws-bootstrap-g4dn")
|
|
61
94
|
def main():
|
|
62
95
|
"""Bootstrap AWS EC2 GPU instances for hybrid local-remote development."""
|
|
@@ -80,6 +113,12 @@ def main():
|
|
|
80
113
|
@click.option("--no-setup", is_flag=True, default=False, help="Skip running the remote setup script.")
|
|
81
114
|
@click.option("--dry-run", is_flag=True, default=False, help="Show what would be done without executing.")
|
|
82
115
|
@click.option("--profile", default=None, help="AWS profile override (defaults to AWS_PROFILE env var).")
|
|
116
|
+
@click.option(
|
|
117
|
+
"--python-version",
|
|
118
|
+
default=None,
|
|
119
|
+
help="Python version for the remote venv (e.g. 3.13, 3.14.2). Passed to uv during setup.",
|
|
120
|
+
)
|
|
121
|
+
@click.option("--ssh-port", default=22, show_default=True, type=int, help="SSH port on the remote instance.")
|
|
83
122
|
def launch(
|
|
84
123
|
instance_type,
|
|
85
124
|
ami_filter,
|
|
@@ -92,6 +131,8 @@ def launch(
|
|
|
92
131
|
no_setup,
|
|
93
132
|
dry_run,
|
|
94
133
|
profile,
|
|
134
|
+
python_version,
|
|
135
|
+
ssh_port,
|
|
95
136
|
):
|
|
96
137
|
"""Launch a GPU-accelerated EC2 instance."""
|
|
97
138
|
config = LaunchConfig(
|
|
@@ -104,6 +145,8 @@ def launch(
|
|
|
104
145
|
volume_size=volume_size,
|
|
105
146
|
run_setup=not no_setup,
|
|
106
147
|
dry_run=dry_run,
|
|
148
|
+
ssh_port=ssh_port,
|
|
149
|
+
python_version=python_version,
|
|
107
150
|
)
|
|
108
151
|
if ami_filter:
|
|
109
152
|
config.ami_filter = ami_filter
|
|
@@ -130,7 +173,7 @@ def launch(
|
|
|
130
173
|
|
|
131
174
|
# Step 3: Security group
|
|
132
175
|
step(3, 6, "Ensuring security group...")
|
|
133
|
-
sg_id = ensure_security_group(ec2, config.security_group, config.tag_value)
|
|
176
|
+
sg_id = ensure_security_group(ec2, config.security_group, config.tag_value, ssh_port=config.ssh_port)
|
|
134
177
|
|
|
135
178
|
pricing = "spot" if config.spot else "on-demand"
|
|
136
179
|
|
|
@@ -145,6 +188,10 @@ def launch(
|
|
|
145
188
|
val("Volume", f"{config.volume_size} GB gp3")
|
|
146
189
|
val("Region", config.region)
|
|
147
190
|
val("Remote setup", "yes" if config.run_setup else "no")
|
|
191
|
+
if config.ssh_port != 22:
|
|
192
|
+
val("SSH port", str(config.ssh_port))
|
|
193
|
+
if config.python_version:
|
|
194
|
+
val("Python version", config.python_version)
|
|
148
195
|
click.echo()
|
|
149
196
|
click.secho("No resources launched (dry-run mode).", fg="yellow")
|
|
150
197
|
return
|
|
@@ -169,9 +216,13 @@ def launch(
|
|
|
169
216
|
# Step 6: SSH and remote setup
|
|
170
217
|
step(6, 6, "Waiting for SSH access...")
|
|
171
218
|
private_key = private_key_path(config.key_path)
|
|
172
|
-
if not wait_for_ssh(public_ip, config.ssh_user, config.key_path):
|
|
219
|
+
if not wait_for_ssh(public_ip, config.ssh_user, config.key_path, port=config.ssh_port):
|
|
173
220
|
warn("SSH did not become available within the timeout.")
|
|
174
|
-
|
|
221
|
+
port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
|
|
222
|
+
info(
|
|
223
|
+
f"Instance is running — try connecting manually:"
|
|
224
|
+
f" ssh -i {private_key}{port_flag} {config.ssh_user}@{public_ip}"
|
|
225
|
+
)
|
|
175
226
|
return
|
|
176
227
|
|
|
177
228
|
if config.run_setup:
|
|
@@ -179,7 +230,9 @@ def launch(
|
|
|
179
230
|
warn(f"Setup script not found at {SETUP_SCRIPT}, skipping.")
|
|
180
231
|
else:
|
|
181
232
|
info("Running remote setup...")
|
|
182
|
-
if run_remote_setup(
|
|
233
|
+
if run_remote_setup(
|
|
234
|
+
public_ip, config.ssh_user, config.key_path, SETUP_SCRIPT, config.python_version, port=config.ssh_port
|
|
235
|
+
):
|
|
183
236
|
success("Remote setup completed successfully.")
|
|
184
237
|
else:
|
|
185
238
|
warn("Remote setup failed. Instance is still running.")
|
|
@@ -191,6 +244,7 @@ def launch(
|
|
|
191
244
|
user=config.ssh_user,
|
|
192
245
|
key_path=config.key_path,
|
|
193
246
|
alias_prefix=config.alias_prefix,
|
|
247
|
+
port=config.ssh_port,
|
|
194
248
|
)
|
|
195
249
|
success(f"Added SSH config alias: {alias}")
|
|
196
250
|
|
|
@@ -206,18 +260,27 @@ def launch(
|
|
|
206
260
|
val("Pricing", pricing)
|
|
207
261
|
val("SSH alias", alias)
|
|
208
262
|
|
|
263
|
+
port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
|
|
264
|
+
|
|
209
265
|
click.echo()
|
|
210
266
|
click.secho(" SSH:", fg="cyan")
|
|
211
|
-
click.secho(f" ssh {alias}", bold=True)
|
|
212
|
-
info(f"or: ssh -i {private_key} {config.ssh_user}@{public_ip}")
|
|
267
|
+
click.secho(f" ssh{port_flag} {alias}", bold=True)
|
|
268
|
+
info(f"or: ssh -i {private_key}{port_flag} {config.ssh_user}@{public_ip}")
|
|
213
269
|
|
|
214
270
|
click.echo()
|
|
215
271
|
click.secho(" Jupyter (via SSH tunnel):", fg="cyan")
|
|
216
|
-
click.secho(f" ssh -NL 8888:localhost:8888 {alias}", bold=True)
|
|
217
|
-
info(f"or: ssh -i {private_key} -NL 8888:localhost:8888 {config.ssh_user}@{public_ip}")
|
|
272
|
+
click.secho(f" ssh -NL 8888:localhost:8888{port_flag} {alias}", bold=True)
|
|
273
|
+
info(f"or: ssh -i {private_key} -NL 8888:localhost:8888{port_flag} {config.ssh_user}@{public_ip}")
|
|
218
274
|
info("Then open: http://localhost:8888")
|
|
219
275
|
info("Notebook: ~/gpu_smoke_test.ipynb (GPU smoke test)")
|
|
220
276
|
|
|
277
|
+
click.echo()
|
|
278
|
+
click.secho(" VSCode Remote SSH:", fg="cyan")
|
|
279
|
+
click.secho(
|
|
280
|
+
f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{config.ssh_user}",
|
|
281
|
+
bold=True,
|
|
282
|
+
)
|
|
283
|
+
|
|
221
284
|
click.echo()
|
|
222
285
|
click.secho(" GPU Benchmark:", fg="cyan")
|
|
223
286
|
click.secho(f" ssh {alias} 'python ~/gpu_benchmark.py'", bold=True)
|
|
@@ -233,7 +296,14 @@ def launch(
|
|
|
233
296
|
@click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
|
|
234
297
|
@click.option("--profile", default=None, help="AWS profile override.")
|
|
235
298
|
@click.option("--gpu", is_flag=True, default=False, help="Query GPU info (CUDA, driver) via SSH.")
|
|
236
|
-
|
|
299
|
+
@click.option(
|
|
300
|
+
"--instructions/--no-instructions",
|
|
301
|
+
"-I",
|
|
302
|
+
default=True,
|
|
303
|
+
show_default=True,
|
|
304
|
+
help="Show connection commands (SSH, Jupyter, VSCode) for each running instance.",
|
|
305
|
+
)
|
|
306
|
+
def status(region, profile, gpu, instructions):
|
|
237
307
|
"""Show running instances created by aws-bootstrap."""
|
|
238
308
|
session = boto3.Session(profile_name=profile, region_name=region)
|
|
239
309
|
ec2 = session.client("ec2")
|
|
@@ -272,11 +342,15 @@ def status(region, profile, gpu):
|
|
|
272
342
|
if inst["PublicIp"]:
|
|
273
343
|
val(" IP", inst["PublicIp"])
|
|
274
344
|
|
|
345
|
+
# Look up SSH config details once (used by --gpu and --with-instructions)
|
|
346
|
+
details = None
|
|
347
|
+
if (gpu or instructions) and state == "running" and inst["PublicIp"]:
|
|
348
|
+
details = get_ssh_host_details(inst["InstanceId"])
|
|
349
|
+
|
|
275
350
|
# GPU info (opt-in, only for running instances with a public IP)
|
|
276
351
|
if gpu and state == "running" and inst["PublicIp"]:
|
|
277
|
-
details = get_ssh_host_details(inst["InstanceId"])
|
|
278
352
|
if details:
|
|
279
|
-
gpu_info = query_gpu_info(details.hostname, details.user, details.identity_file)
|
|
353
|
+
gpu_info = query_gpu_info(details.hostname, details.user, details.identity_file, port=details.port)
|
|
280
354
|
else:
|
|
281
355
|
gpu_info = query_gpu_info(
|
|
282
356
|
inst["PublicIp"],
|
|
@@ -320,6 +394,29 @@ def status(region, profile, gpu):
|
|
|
320
394
|
val(" Est. cost", f"~${est_cost:.4f}")
|
|
321
395
|
|
|
322
396
|
val(" Launched", str(inst["LaunchTime"]))
|
|
397
|
+
|
|
398
|
+
# Connection instructions (opt-in, only for running instances with a public IP and alias)
|
|
399
|
+
if instructions and state == "running" and inst["PublicIp"] and alias:
|
|
400
|
+
user = details.user if details else "ubuntu"
|
|
401
|
+
port = details.port if details else 22
|
|
402
|
+
port_flag = f" -p {port}" if port != 22 else ""
|
|
403
|
+
|
|
404
|
+
click.echo()
|
|
405
|
+
click.secho(" SSH:", fg="cyan")
|
|
406
|
+
click.secho(f" ssh{port_flag} {alias}", bold=True)
|
|
407
|
+
|
|
408
|
+
click.secho(" Jupyter (via SSH tunnel):", fg="cyan")
|
|
409
|
+
click.secho(f" ssh -NL 8888:localhost:8888{port_flag} {alias}", bold=True)
|
|
410
|
+
|
|
411
|
+
click.secho(" VSCode Remote SSH:", fg="cyan")
|
|
412
|
+
click.secho(
|
|
413
|
+
f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{user}",
|
|
414
|
+
bold=True,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
click.secho(" GPU Benchmark:", fg="cyan")
|
|
418
|
+
click.secho(f" ssh {alias} 'python ~/gpu_benchmark.py'", bold=True)
|
|
419
|
+
|
|
323
420
|
click.echo()
|
|
324
421
|
first_id = instances[0]["InstanceId"]
|
|
325
422
|
click.echo(" To terminate: " + click.style(f"aws-bootstrap terminate {first_id}", bold=True))
|
|
@@ -59,7 +59,7 @@ def get_latest_ami(ec2_client, ami_filter: str) -> dict:
|
|
|
59
59
|
return images[0]
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
def ensure_security_group(ec2_client, name: str, tag_value: str) -> str:
|
|
62
|
+
def ensure_security_group(ec2_client, name: str, tag_value: str, ssh_port: int = 22) -> str:
|
|
63
63
|
"""Find or create a security group with SSH ingress in the default VPC."""
|
|
64
64
|
# Find default VPC
|
|
65
65
|
vpcs = ec2_client.describe_vpcs(Filters=[{"Name": "isDefault", "Values": ["true"]}])
|
|
@@ -103,8 +103,8 @@ def ensure_security_group(ec2_client, name: str, tag_value: str) -> str:
|
|
|
103
103
|
IpPermissions=[
|
|
104
104
|
{
|
|
105
105
|
"IpProtocol": "tcp",
|
|
106
|
-
"FromPort":
|
|
107
|
-
"ToPort":
|
|
106
|
+
"FromPort": ssh_port,
|
|
107
|
+
"ToPort": ssh_port,
|
|
108
108
|
"IpRanges": [{"CidrIp": "0.0.0.0/0", "Description": "SSH access"}],
|
|
109
109
|
}
|
|
110
110
|
],
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""GPU architecture mapping and GPU info dataclass."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
_GPU_ARCHITECTURES: dict[str, str] = {
|
|
8
|
+
"7.0": "Volta",
|
|
9
|
+
"7.5": "Turing",
|
|
10
|
+
"8.0": "Ampere",
|
|
11
|
+
"8.6": "Ampere",
|
|
12
|
+
"8.7": "Ampere",
|
|
13
|
+
"8.9": "Ada Lovelace",
|
|
14
|
+
"9.0": "Hopper",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class GpuInfo:
|
|
20
|
+
"""GPU information retrieved via nvidia-smi and nvcc."""
|
|
21
|
+
|
|
22
|
+
driver_version: str
|
|
23
|
+
cuda_driver_version: str # max CUDA version supported by driver (from nvidia-smi)
|
|
24
|
+
cuda_toolkit_version: str | None # actual CUDA toolkit installed (from nvcc), None if unavailable
|
|
25
|
+
gpu_name: str
|
|
26
|
+
compute_capability: str
|
|
27
|
+
architecture: str
|
{aws_bootstrap_g4dn-0.1.0 → aws_bootstrap_g4dn-0.3.0}/aws_bootstrap/resources/remote_setup.sh
RENAMED
|
@@ -34,7 +34,13 @@ if ! command -v uv &>/dev/null; then
|
|
|
34
34
|
fi
|
|
35
35
|
export PATH="$HOME/.local/bin:$PATH"
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
if [ -n "${PYTHON_VERSION:-}" ]; then
|
|
38
|
+
echo " Installing Python ${PYTHON_VERSION}..."
|
|
39
|
+
uv python install "$PYTHON_VERSION"
|
|
40
|
+
uv venv --python "$PYTHON_VERSION" ~/venv
|
|
41
|
+
else
|
|
42
|
+
uv venv ~/venv
|
|
43
|
+
fi
|
|
38
44
|
|
|
39
45
|
# --- CUDA-aware PyTorch installation ---
|
|
40
46
|
# Known PyTorch CUDA wheel tags (ascending order).
|