aws-bootstrap-g4dn 0.3.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/.pre-commit-config.yaml +1 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/CLAUDE.md +7 -1
- {aws_bootstrap_g4dn-0.3.0/aws_bootstrap_g4dn.egg-info → aws_bootstrap_g4dn-0.5.0}/PKG-INFO +35 -6
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/README.md +34 -5
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/cli.py +20 -8
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/resources/gpu_benchmark.py +15 -5
- aws_bootstrap_g4dn-0.5.0/aws_bootstrap/resources/launch.json +42 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/resources/remote_setup.sh +83 -5
- aws_bootstrap_g4dn-0.5.0/aws_bootstrap/resources/saxpy.cu +49 -0
- aws_bootstrap_g4dn-0.5.0/aws_bootstrap/resources/tasks.json +48 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/ssh.py +64 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/tests/test_cli.py +53 -1
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/tests/test_ssh_config.py +76 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0/aws_bootstrap_g4dn.egg-info}/PKG-INFO +35 -6
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap_g4dn.egg-info/SOURCES.txt +5 -1
- aws_bootstrap_g4dn-0.5.0/docs/nsight-remote-profiling.md +245 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/pyproject.toml +1 -1
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/.github/workflows/ci.yml +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/.github/workflows/publish-to-pypi.yml +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/.gitignore +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/CODE_OF_CONDUCT.md +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/CONTRIBUTING.md +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/LICENSE +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/SECURITY.md +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/__init__.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/config.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/ec2.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/gpu.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/resources/__init__.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/resources/gpu_smoke_test.ipynb +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/resources/requirements.txt +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/tests/__init__.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/tests/test_config.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/tests/test_ec2.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/tests/test_gpu.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/tests/test_ssh_gpu.py +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap_g4dn.egg-info/dependency_links.txt +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap_g4dn.egg-info/entry_points.txt +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap_g4dn.egg-info/requires.txt +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap_g4dn.egg-info/top_level.txt +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/setup.cfg +0 -0
- {aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/uv.lock +0 -0
|
@@ -39,6 +39,9 @@ aws_bootstrap/
|
|
|
39
39
|
__init__.py
|
|
40
40
|
gpu_benchmark.py # GPU throughput benchmark (CNN + Transformer), copied to ~/gpu_benchmark.py on instance
|
|
41
41
|
gpu_smoke_test.ipynb # Interactive Jupyter notebook for GPU verification, copied to ~/gpu_smoke_test.ipynb
|
|
42
|
+
launch.json # VSCode CUDA debug config template (deployed to ~/workspace/.vscode/launch.json)
|
|
43
|
+
saxpy.cu # Example CUDA SAXPY source (deployed to ~/workspace/saxpy.cu)
|
|
44
|
+
tasks.json # VSCode CUDA build tasks template (deployed to ~/workspace/.vscode/tasks.json)
|
|
42
45
|
remote_setup.sh # Uploaded & run on instance post-boot (GPU verify, Jupyter, etc.)
|
|
43
46
|
requirements.txt # Python dependencies installed on the remote instance
|
|
44
47
|
tests/ # Unit tests (pytest)
|
|
@@ -49,6 +52,7 @@ aws_bootstrap/
|
|
|
49
52
|
test_ssh_config.py
|
|
50
53
|
test_ssh_gpu.py
|
|
51
54
|
docs/
|
|
55
|
+
nsight-remote-profiling.md # Nsight Compute, Nsight Systems, and Nsight VSCE remote profiling guide
|
|
52
56
|
spot-request-lifecycle.md # Research notes on spot request cleanup
|
|
53
57
|
```
|
|
54
58
|
|
|
@@ -58,7 +62,7 @@ Entry point: `aws-bootstrap = "aws_bootstrap.cli:main"` (installed via `uv sync`
|
|
|
58
62
|
|
|
59
63
|
- **`launch`** — provisions an EC2 instance (spot by default, falls back to on-demand on capacity errors); adds SSH config alias (e.g. `aws-gpu1`) to `~/.ssh/config`; `--python-version` controls which Python `uv` installs in the remote venv; `--ssh-port` overrides the default SSH port (22) for security group ingress, connection checks, and SSH config
|
|
60
64
|
- **`status`** — lists all non-terminated instances (including `shutting-down`) with type, IP, SSH alias, pricing (spot price/hr or on-demand), uptime, and estimated cost for running spot instances; `--gpu` flag queries GPU info via SSH, reporting both CUDA toolkit version (from `nvcc`) and driver-supported max (from `nvidia-smi`); `--instructions` (default: on) prints connection commands (SSH, Jupyter tunnel, VSCode Remote SSH, GPU benchmark) for each running instance; suppress with `--no-instructions`
|
|
61
|
-
- **`terminate`** — terminates instances by ID or all aws-bootstrap instances in the region; removes SSH config aliases
|
|
65
|
+
- **`terminate`** — terminates instances by ID or SSH alias (e.g. `aws-gpu1`, resolved via `~/.ssh/config`), or all aws-bootstrap instances in the region if no arguments given; removes SSH config aliases
|
|
62
66
|
- **`list instance-types`** — lists EC2 instance types matching a family prefix (default: `g4dn`), showing vCPUs, memory, and GPU info
|
|
63
67
|
- **`list amis`** — lists available AMIs matching a name pattern (default: Deep Learning Base OSS Nvidia Driver GPU AMIs), sorted newest-first
|
|
64
68
|
|
|
@@ -99,8 +103,10 @@ The `KNOWN_CUDA_TAGS` array in `remote_setup.sh` lists the CUDA wheel tags publi
|
|
|
99
103
|
|
|
100
104
|
`remote_setup.sh` also:
|
|
101
105
|
- Creates `~/venv` and appends `source ~/venv/bin/activate` to `~/.bashrc` so the venv is auto-activated on SSH login. When `--python-version` is passed to `launch`, the CLI sets `PYTHON_VERSION` as an inline env var on the SSH command; `remote_setup.sh` reads it to run `uv python install` and `uv venv --python` with the requested version
|
|
106
|
+
- Adds NVIDIA Nsight Systems (`nsys`) to PATH if installed under `/opt/nvidia/nsight-systems/` (pre-installed on Deep Learning AMIs but not on PATH by default). Fixes directory permissions, finds the latest version, and prepends its `bin/` to PATH in `~/.bashrc`
|
|
102
107
|
- Runs a quick CUDA smoke test (`torch.cuda.is_available()` + GPU matmul) after PyTorch installation to verify the GPU stack; prints a WARNING on failure but does not abort
|
|
103
108
|
- Copies `gpu_benchmark.py` to `~/gpu_benchmark.py` and `gpu_smoke_test.ipynb` to `~/gpu_smoke_test.ipynb`
|
|
109
|
+
- Sets up `~/workspace/.vscode/` with `launch.json` and `tasks.json` for CUDA debugging. Detects `cuda-gdb` path and GPU SM architecture (via `nvidia-smi --query-gpu=compute_cap`) at deploy time, replacing `__CUDA_GDB_PATH__` and `__GPU_ARCH__` placeholders in the template files via `sed`
|
|
104
110
|
|
|
105
111
|
## GPU Benchmark
|
|
106
112
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aws-bootstrap-g4dn
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Bootstrap AWS EC2 GPU instances for hybrid local-remote development
|
|
5
5
|
Author: Adam Ever-Hadani
|
|
6
6
|
License-Expression: MIT
|
|
@@ -49,7 +49,7 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
|
|
|
49
49
|
### 🎯 Target Workflows
|
|
50
50
|
|
|
51
51
|
1. **Jupyter server-client** — Jupyter runs on the instance, connect from your local browser
|
|
52
|
-
2. **VSCode Remote SSH** — `
|
|
52
|
+
2. **VSCode Remote SSH** — opens `~/workspace` with pre-configured CUDA debug/build tasks and an example `.cu` file
|
|
53
53
|
3. **NVIDIA Nsight remote debugging** — GPU debugging over SSH
|
|
54
54
|
|
|
55
55
|
---
|
|
@@ -162,6 +162,7 @@ The setup script runs automatically on the instance after SSH becomes available:
|
|
|
162
162
|
| **GPU smoke test notebook** | Copies `gpu_smoke_test.ipynb` to `~/gpu_smoke_test.ipynb` (open in JupyterLab) |
|
|
163
163
|
| **Jupyter** | Configures and starts JupyterLab as a systemd service on port 8888 |
|
|
164
164
|
| **SSH keepalive** | Configures server-side keepalive to prevent idle disconnects |
|
|
165
|
+
| **VSCode workspace** | Creates `~/workspace/.vscode/` with `launch.json` and `tasks.json` (auto-detected `cuda-gdb` path and GPU arch), plus an example `saxpy.cu` |
|
|
165
166
|
|
|
166
167
|
### 📊 GPU Benchmark
|
|
167
168
|
|
|
@@ -200,6 +201,28 @@ ssh -i ~/.ssh/id_ed25519 -NL 8888:localhost:8888 ubuntu@<public-ip>
|
|
|
200
201
|
|
|
201
202
|
A **GPU smoke test notebook** (`~/gpu_smoke_test.ipynb`) is pre-installed on every instance. Open it in JupyterLab to interactively verify the CUDA stack, run FP32/FP16 matmuls, train a small CNN on MNIST, and visualise training loss and GPU memory usage.
|
|
202
203
|
|
|
204
|
+
### 🖥️ VSCode Remote SSH
|
|
205
|
+
|
|
206
|
+
The remote setup creates a `~/workspace` folder with pre-configured CUDA debug and build tasks:
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
~/workspace/
|
|
210
|
+
├── .vscode/
|
|
211
|
+
│ ├── launch.json # CUDA debug configs (cuda-gdb path auto-detected)
|
|
212
|
+
│ └── tasks.json # nvcc build tasks (GPU arch auto-detected, e.g. sm_75)
|
|
213
|
+
└── saxpy.cu # Example CUDA source — open and press F5 to debug
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Connect directly from your terminal:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
code --folder-uri vscode-remote://ssh-remote+aws-gpu1/home/ubuntu/workspace
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Then install the [Nsight VSCE extension](https://marketplace.visualstudio.com/items?itemName=NVIDIA.nsight-vscode-edition) on the remote when prompted. Open `saxpy.cu`, set a breakpoint, and press F5.
|
|
223
|
+
|
|
224
|
+
See [Nsight remote profiling guide](docs/nsight-remote-profiling.md) for more details on CUDA debugging and profiling workflows.
|
|
225
|
+
|
|
203
226
|
### 📋 Listing Resources
|
|
204
227
|
|
|
205
228
|
```bash
|
|
@@ -238,8 +261,14 @@ aws-bootstrap status --region us-east-1
|
|
|
238
261
|
# Terminate all aws-bootstrap instances (with confirmation prompt)
|
|
239
262
|
aws-bootstrap terminate
|
|
240
263
|
|
|
241
|
-
# Terminate
|
|
242
|
-
aws-bootstrap terminate
|
|
264
|
+
# Terminate by SSH alias (resolved via ~/.ssh/config)
|
|
265
|
+
aws-bootstrap terminate aws-gpu1
|
|
266
|
+
|
|
267
|
+
# Terminate by instance ID
|
|
268
|
+
aws-bootstrap terminate i-abc123
|
|
269
|
+
|
|
270
|
+
# Mix aliases and instance IDs
|
|
271
|
+
aws-bootstrap terminate aws-gpu1 i-def456
|
|
243
272
|
|
|
244
273
|
# Skip confirmation prompt
|
|
245
274
|
aws-bootstrap terminate --yes
|
|
@@ -251,7 +280,7 @@ aws-bootstrap terminate --yes
|
|
|
251
280
|
CUDA: 12.8 (driver supports up to 13.0)
|
|
252
281
|
```
|
|
253
282
|
|
|
254
|
-
SSH aliases are managed automatically — they're created on `launch`, shown in `status`, and cleaned up on `terminate`. Aliases use sequential numbering (`aws-gpu1`, `aws-gpu2`, etc.) and never reuse numbers from previous instances.
|
|
283
|
+
SSH aliases are managed automatically — they're created on `launch`, shown in `status`, and cleaned up on `terminate`. Aliases use sequential numbering (`aws-gpu1`, `aws-gpu2`, etc.) and never reuse numbers from previous instances. You can use aliases anywhere you'd use an instance ID, e.g. `aws-bootstrap terminate aws-gpu1`.
|
|
255
284
|
|
|
256
285
|
## EC2 vCPU Quotas
|
|
257
286
|
|
|
@@ -322,7 +351,7 @@ aws-bootstrap launch --instance-type t3.medium --ami-filter "ubuntu/images/hvm-s
|
|
|
322
351
|
| GPU instance pricing | [instances.vantage.sh](https://instances.vantage.sh/aws/ec2/g4dn.xlarge) |
|
|
323
352
|
| Spot instance quotas | [AWS docs](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-limits.html) |
|
|
324
353
|
| Deep Learning AMIs | [AWS docs](https://docs.aws.amazon.com/dlami/latest/devguide/what-is-dlami.html) |
|
|
325
|
-
|
|
|
354
|
+
| Nsight remote GPU profiling | [Guide](docs/nsight-remote-profiling.md) — Nsight Compute, Nsight Systems, and Nsight VSCE on EC2 |
|
|
326
355
|
|
|
327
356
|
Tutorials on setting up a CUDA environment on EC2 GPU instances:
|
|
328
357
|
|
|
@@ -30,7 +30,7 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
|
|
|
30
30
|
### 🎯 Target Workflows
|
|
31
31
|
|
|
32
32
|
1. **Jupyter server-client** — Jupyter runs on the instance, connect from your local browser
|
|
33
|
-
2. **VSCode Remote SSH** — `
|
|
33
|
+
2. **VSCode Remote SSH** — opens `~/workspace` with pre-configured CUDA debug/build tasks and an example `.cu` file
|
|
34
34
|
3. **NVIDIA Nsight remote debugging** — GPU debugging over SSH
|
|
35
35
|
|
|
36
36
|
---
|
|
@@ -143,6 +143,7 @@ The setup script runs automatically on the instance after SSH becomes available:
|
|
|
143
143
|
| **GPU smoke test notebook** | Copies `gpu_smoke_test.ipynb` to `~/gpu_smoke_test.ipynb` (open in JupyterLab) |
|
|
144
144
|
| **Jupyter** | Configures and starts JupyterLab as a systemd service on port 8888 |
|
|
145
145
|
| **SSH keepalive** | Configures server-side keepalive to prevent idle disconnects |
|
|
146
|
+
| **VSCode workspace** | Creates `~/workspace/.vscode/` with `launch.json` and `tasks.json` (auto-detected `cuda-gdb` path and GPU arch), plus an example `saxpy.cu` |
|
|
146
147
|
|
|
147
148
|
### 📊 GPU Benchmark
|
|
148
149
|
|
|
@@ -181,6 +182,28 @@ ssh -i ~/.ssh/id_ed25519 -NL 8888:localhost:8888 ubuntu@<public-ip>
|
|
|
181
182
|
|
|
182
183
|
A **GPU smoke test notebook** (`~/gpu_smoke_test.ipynb`) is pre-installed on every instance. Open it in JupyterLab to interactively verify the CUDA stack, run FP32/FP16 matmuls, train a small CNN on MNIST, and visualise training loss and GPU memory usage.
|
|
183
184
|
|
|
185
|
+
### 🖥️ VSCode Remote SSH
|
|
186
|
+
|
|
187
|
+
The remote setup creates a `~/workspace` folder with pre-configured CUDA debug and build tasks:
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
~/workspace/
|
|
191
|
+
├── .vscode/
|
|
192
|
+
│ ├── launch.json # CUDA debug configs (cuda-gdb path auto-detected)
|
|
193
|
+
│ └── tasks.json # nvcc build tasks (GPU arch auto-detected, e.g. sm_75)
|
|
194
|
+
└── saxpy.cu # Example CUDA source — open and press F5 to debug
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Connect directly from your terminal:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
code --folder-uri vscode-remote://ssh-remote+aws-gpu1/home/ubuntu/workspace
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Then install the [Nsight VSCE extension](https://marketplace.visualstudio.com/items?itemName=NVIDIA.nsight-vscode-edition) on the remote when prompted. Open `saxpy.cu`, set a breakpoint, and press F5.
|
|
204
|
+
|
|
205
|
+
See [Nsight remote profiling guide](docs/nsight-remote-profiling.md) for more details on CUDA debugging and profiling workflows.
|
|
206
|
+
|
|
184
207
|
### 📋 Listing Resources
|
|
185
208
|
|
|
186
209
|
```bash
|
|
@@ -219,8 +242,14 @@ aws-bootstrap status --region us-east-1
|
|
|
219
242
|
# Terminate all aws-bootstrap instances (with confirmation prompt)
|
|
220
243
|
aws-bootstrap terminate
|
|
221
244
|
|
|
222
|
-
# Terminate
|
|
223
|
-
aws-bootstrap terminate
|
|
245
|
+
# Terminate by SSH alias (resolved via ~/.ssh/config)
|
|
246
|
+
aws-bootstrap terminate aws-gpu1
|
|
247
|
+
|
|
248
|
+
# Terminate by instance ID
|
|
249
|
+
aws-bootstrap terminate i-abc123
|
|
250
|
+
|
|
251
|
+
# Mix aliases and instance IDs
|
|
252
|
+
aws-bootstrap terminate aws-gpu1 i-def456
|
|
224
253
|
|
|
225
254
|
# Skip confirmation prompt
|
|
226
255
|
aws-bootstrap terminate --yes
|
|
@@ -232,7 +261,7 @@ aws-bootstrap terminate --yes
|
|
|
232
261
|
CUDA: 12.8 (driver supports up to 13.0)
|
|
233
262
|
```
|
|
234
263
|
|
|
235
|
-
SSH aliases are managed automatically — they're created on `launch`, shown in `status`, and cleaned up on `terminate`. Aliases use sequential numbering (`aws-gpu1`, `aws-gpu2`, etc.) and never reuse numbers from previous instances.
|
|
264
|
+
SSH aliases are managed automatically — they're created on `launch`, shown in `status`, and cleaned up on `terminate`. Aliases use sequential numbering (`aws-gpu1`, `aws-gpu2`, etc.) and never reuse numbers from previous instances. You can use aliases anywhere you'd use an instance ID, e.g. `aws-bootstrap terminate aws-gpu1`.
|
|
236
265
|
|
|
237
266
|
## EC2 vCPU Quotas
|
|
238
267
|
|
|
@@ -303,7 +332,7 @@ aws-bootstrap launch --instance-type t3.medium --ami-filter "ubuntu/images/hvm-s
|
|
|
303
332
|
| GPU instance pricing | [instances.vantage.sh](https://instances.vantage.sh/aws/ec2/g4dn.xlarge) |
|
|
304
333
|
| Spot instance quotas | [AWS docs](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-limits.html) |
|
|
305
334
|
| Deep Learning AMIs | [AWS docs](https://docs.aws.amazon.com/dlami/latest/devguide/what-is-dlami.html) |
|
|
306
|
-
|
|
|
335
|
+
| Nsight remote GPU profiling | [Guide](docs/nsight-remote-profiling.md) — Nsight Compute, Nsight Systems, and Nsight VSCE on EC2 |
|
|
307
336
|
|
|
308
337
|
Tutorials on setting up a CUDA environment on EC2 GPU instances:
|
|
309
338
|
|
|
@@ -29,6 +29,7 @@ from .ssh import (
|
|
|
29
29
|
private_key_path,
|
|
30
30
|
query_gpu_info,
|
|
31
31
|
remove_ssh_host,
|
|
32
|
+
resolve_instance_id,
|
|
32
33
|
run_remote_setup,
|
|
33
34
|
wait_for_ssh,
|
|
34
35
|
)
|
|
@@ -277,7 +278,7 @@ def launch(
|
|
|
277
278
|
click.echo()
|
|
278
279
|
click.secho(" VSCode Remote SSH:", fg="cyan")
|
|
279
280
|
click.secho(
|
|
280
|
-
f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{config.ssh_user}",
|
|
281
|
+
f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{config.ssh_user}/workspace",
|
|
281
282
|
bold=True,
|
|
282
283
|
)
|
|
283
284
|
|
|
@@ -288,7 +289,7 @@ def launch(
|
|
|
288
289
|
|
|
289
290
|
click.echo()
|
|
290
291
|
click.secho(" Terminate:", fg="cyan")
|
|
291
|
-
click.secho(f" aws-bootstrap terminate {
|
|
292
|
+
click.secho(f" aws-bootstrap terminate {alias} --region {config.region}", bold=True)
|
|
292
293
|
click.echo()
|
|
293
294
|
|
|
294
295
|
|
|
@@ -410,7 +411,7 @@ def status(region, profile, gpu, instructions):
|
|
|
410
411
|
|
|
411
412
|
click.secho(" VSCode Remote SSH:", fg="cyan")
|
|
412
413
|
click.secho(
|
|
413
|
-
f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{user}",
|
|
414
|
+
f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{user}/workspace",
|
|
414
415
|
bold=True,
|
|
415
416
|
)
|
|
416
417
|
|
|
@@ -419,7 +420,8 @@ def status(region, profile, gpu, instructions):
|
|
|
419
420
|
|
|
420
421
|
click.echo()
|
|
421
422
|
first_id = instances[0]["InstanceId"]
|
|
422
|
-
|
|
423
|
+
first_ref = ssh_hosts.get(first_id, first_id)
|
|
424
|
+
click.echo(" To terminate: " + click.style(f"aws-bootstrap terminate {first_ref}", bold=True))
|
|
423
425
|
click.echo()
|
|
424
426
|
|
|
425
427
|
|
|
@@ -427,18 +429,28 @@ def status(region, profile, gpu, instructions):
|
|
|
427
429
|
@click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
|
|
428
430
|
@click.option("--profile", default=None, help="AWS profile override.")
|
|
429
431
|
@click.option("--yes", "-y", is_flag=True, default=False, help="Skip confirmation prompt.")
|
|
430
|
-
@click.argument("instance_ids", nargs=-1)
|
|
432
|
+
@click.argument("instance_ids", nargs=-1, metavar="[INSTANCE_ID_OR_ALIAS]...")
|
|
431
433
|
def terminate(region, profile, yes, instance_ids):
|
|
432
434
|
"""Terminate instances created by aws-bootstrap.
|
|
433
435
|
|
|
434
|
-
Pass specific instance IDs
|
|
435
|
-
aws-bootstrap instances in the region.
|
|
436
|
+
Pass specific instance IDs or SSH aliases (e.g. aws-gpu1) to terminate,
|
|
437
|
+
or omit to terminate all aws-bootstrap instances in the region.
|
|
436
438
|
"""
|
|
437
439
|
session = boto3.Session(profile_name=profile, region_name=region)
|
|
438
440
|
ec2 = session.client("ec2")
|
|
439
441
|
|
|
440
442
|
if instance_ids:
|
|
441
|
-
targets =
|
|
443
|
+
targets = []
|
|
444
|
+
for value in instance_ids:
|
|
445
|
+
resolved = resolve_instance_id(value)
|
|
446
|
+
if resolved is None:
|
|
447
|
+
raise CLIError(
|
|
448
|
+
f"Could not resolve '{value}' to an instance ID.\n\n"
|
|
449
|
+
" It is not a valid instance ID or a known SSH alias."
|
|
450
|
+
)
|
|
451
|
+
if resolved != value:
|
|
452
|
+
info(f"Resolved alias '{value}' -> {resolved}")
|
|
453
|
+
targets.append(resolved)
|
|
442
454
|
else:
|
|
443
455
|
instances = find_tagged_instances(ec2, "aws-bootstrap-g4dn")
|
|
444
456
|
if not instances:
|
{aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/resources/gpu_benchmark.py
RENAMED
|
@@ -628,7 +628,9 @@ def configure_precision(device: torch.device, requested: PrecisionMode) -> Preci
|
|
|
628
628
|
return PrecisionMode.FP32
|
|
629
629
|
|
|
630
630
|
|
|
631
|
-
def print_system_info(
|
|
631
|
+
def print_system_info(
|
|
632
|
+
requested_precision: PrecisionMode, force_cpu: bool = False
|
|
633
|
+
) -> tuple[torch.device, PrecisionMode]:
|
|
632
634
|
"""Print system and CUDA information, return device and actual precision mode."""
|
|
633
635
|
print("\n" + "=" * 60)
|
|
634
636
|
print("System Information")
|
|
@@ -636,7 +638,7 @@ def print_system_info(requested_precision: PrecisionMode) -> tuple[torch.device,
|
|
|
636
638
|
print(f"PyTorch version: {torch.__version__}")
|
|
637
639
|
print(f"Python version: {sys.version.split()[0]}")
|
|
638
640
|
|
|
639
|
-
if torch.cuda.is_available():
|
|
641
|
+
if torch.cuda.is_available() and not force_cpu:
|
|
640
642
|
device = torch.device("cuda")
|
|
641
643
|
print("CUDA available: Yes")
|
|
642
644
|
print(f"CUDA version: {torch.version.cuda}")
|
|
@@ -666,8 +668,11 @@ def print_system_info(requested_precision: PrecisionMode) -> tuple[torch.device,
|
|
|
666
668
|
else:
|
|
667
669
|
device = torch.device("cpu")
|
|
668
670
|
actual_precision = PrecisionMode.FP32
|
|
669
|
-
|
|
670
|
-
|
|
671
|
+
if force_cpu:
|
|
672
|
+
print("CPU-only mode requested (--cpu flag)")
|
|
673
|
+
else:
|
|
674
|
+
print("CUDA available: No (running on CPU)")
|
|
675
|
+
print("Running on CPU for benchmarking")
|
|
671
676
|
|
|
672
677
|
print("=" * 60)
|
|
673
678
|
return device, actual_precision
|
|
@@ -724,10 +729,15 @@ def main() -> None:
|
|
|
724
729
|
action="store_true",
|
|
725
730
|
help="Run CUDA/cuBLAS diagnostic tests before benchmarking",
|
|
726
731
|
)
|
|
732
|
+
parser.add_argument(
|
|
733
|
+
"--cpu",
|
|
734
|
+
action="store_true",
|
|
735
|
+
help="Force CPU-only execution (for CPU vs GPU comparison)",
|
|
736
|
+
)
|
|
727
737
|
args = parser.parse_args()
|
|
728
738
|
|
|
729
739
|
requested_precision = PrecisionMode(args.precision)
|
|
730
|
-
device, actual_precision = print_system_info(requested_precision)
|
|
740
|
+
device, actual_precision = print_system_info(requested_precision, force_cpu=args.cpu)
|
|
731
741
|
|
|
732
742
|
# Run diagnostics if requested
|
|
733
743
|
if args.diagnose:
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
// CUDA debug configurations for VSCode
|
|
3
|
+
// Deployed to: ~/workspace/.vscode/launch.json
|
|
4
|
+
//
|
|
5
|
+
// Usage: Open any .cu file, press F5 to build and debug
|
|
6
|
+
"version": "0.2.0",
|
|
7
|
+
"configurations": [
|
|
8
|
+
{
|
|
9
|
+
"name": "CUDA: Build and Debug Active File",
|
|
10
|
+
"type": "cuda-gdb",
|
|
11
|
+
"request": "launch",
|
|
12
|
+
"program": "${fileDirname}/${fileBasenameNoExtension}",
|
|
13
|
+
"args": [],
|
|
14
|
+
"cwd": "${fileDirname}",
|
|
15
|
+
"miDebuggerPath": "__CUDA_GDB_PATH__",
|
|
16
|
+
"stopAtEntry": false,
|
|
17
|
+
"preLaunchTask": "nvcc: build active file (debug)"
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"name": "CUDA: Build and Debug (stop at main)",
|
|
21
|
+
"type": "cuda-gdb",
|
|
22
|
+
"request": "launch",
|
|
23
|
+
"program": "${fileDirname}/${fileBasenameNoExtension}",
|
|
24
|
+
"args": [],
|
|
25
|
+
"cwd": "${fileDirname}",
|
|
26
|
+
"miDebuggerPath": "__CUDA_GDB_PATH__",
|
|
27
|
+
"stopAtEntry": true,
|
|
28
|
+
"preLaunchTask": "nvcc: build active file (debug)"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"name": "CUDA: Run Active File (no debug)",
|
|
32
|
+
"type": "cuda-gdb",
|
|
33
|
+
"request": "launch",
|
|
34
|
+
"program": "${fileDirname}/${fileBasenameNoExtension}",
|
|
35
|
+
"args": [],
|
|
36
|
+
"cwd": "${fileDirname}",
|
|
37
|
+
"miDebuggerPath": "__CUDA_GDB_PATH__",
|
|
38
|
+
"stopAtEntry": false,
|
|
39
|
+
"preLaunchTask": "nvcc: build active file (release)"
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
}
|
{aws_bootstrap_g4dn-0.3.0 → aws_bootstrap_g4dn-0.5.0}/aws_bootstrap/resources/remote_setup.sh
RENAMED
|
@@ -7,7 +7,7 @@ echo "=== aws-bootstrap-g4dn remote setup ==="
|
|
|
7
7
|
|
|
8
8
|
# 1. Verify GPU
|
|
9
9
|
echo ""
|
|
10
|
-
echo "[1/
|
|
10
|
+
echo "[1/6] Verifying GPU and CUDA..."
|
|
11
11
|
if command -v nvidia-smi &>/dev/null; then
|
|
12
12
|
nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader
|
|
13
13
|
else
|
|
@@ -20,15 +20,40 @@ else
|
|
|
20
20
|
echo "WARNING: nvcc not found (CUDA toolkit may not be installed)"
|
|
21
21
|
fi
|
|
22
22
|
|
|
23
|
+
# Make Nsight Systems (nsys) available on PATH if installed under /opt/nvidia
|
|
24
|
+
if ! command -v nsys &>/dev/null; then
|
|
25
|
+
NSIGHT_DIR="/opt/nvidia/nsight-systems"
|
|
26
|
+
if [ -d "$NSIGHT_DIR" ]; then
|
|
27
|
+
# Fix permissions — the parent dir is often root-only (drwx------)
|
|
28
|
+
sudo chmod o+rx "$NSIGHT_DIR"
|
|
29
|
+
# Find the latest version directory (lexicographic sort)
|
|
30
|
+
NSYS_VERSION=$(ls -1 "$NSIGHT_DIR" | sort -V | tail -1)
|
|
31
|
+
if [ -n "$NSYS_VERSION" ] && [ -x "$NSIGHT_DIR/$NSYS_VERSION/bin/nsys" ]; then
|
|
32
|
+
NSYS_BIN="$NSIGHT_DIR/$NSYS_VERSION/bin"
|
|
33
|
+
if ! grep -q "nsight-systems" ~/.bashrc 2>/dev/null; then
|
|
34
|
+
echo "export PATH=\"$NSYS_BIN:\$PATH\"" >> ~/.bashrc
|
|
35
|
+
fi
|
|
36
|
+
export PATH="$NSYS_BIN:$PATH"
|
|
37
|
+
echo " Nsight Systems $NSYS_VERSION added to PATH ($NSYS_BIN)"
|
|
38
|
+
else
|
|
39
|
+
echo " WARNING: Nsight Systems directory found but no nsys binary"
|
|
40
|
+
fi
|
|
41
|
+
else
|
|
42
|
+
echo " Nsight Systems not found at $NSIGHT_DIR"
|
|
43
|
+
fi
|
|
44
|
+
else
|
|
45
|
+
echo " nsys already on PATH: $(command -v nsys)"
|
|
46
|
+
fi
|
|
47
|
+
|
|
23
48
|
# 2. Install utilities
|
|
24
49
|
echo ""
|
|
25
|
-
echo "[2/
|
|
50
|
+
echo "[2/6] Installing utilities..."
|
|
26
51
|
sudo apt-get update -qq
|
|
27
52
|
sudo apt-get install -y -qq htop tmux tree jq
|
|
28
53
|
|
|
29
54
|
# 3. Set up Python environment with uv
|
|
30
55
|
echo ""
|
|
31
|
-
echo "[3/
|
|
56
|
+
echo "[3/6] Setting up Python environment with uv..."
|
|
32
57
|
if ! command -v uv &>/dev/null; then
|
|
33
58
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
34
59
|
fi
|
|
@@ -153,7 +178,7 @@ echo " Jupyter config written to $JUPYTER_CONFIG_DIR/jupyter_lab_config.py"
|
|
|
153
178
|
|
|
154
179
|
# 4. Jupyter systemd service
|
|
155
180
|
echo ""
|
|
156
|
-
echo "[4/
|
|
181
|
+
echo "[4/6] Setting up Jupyter systemd service..."
|
|
157
182
|
LOGIN_USER=$(whoami)
|
|
158
183
|
|
|
159
184
|
sudo tee /etc/systemd/system/jupyter.service > /dev/null << SVCEOF
|
|
@@ -180,7 +205,7 @@ echo " Jupyter service started (port 8888)"
|
|
|
180
205
|
|
|
181
206
|
# 5. SSH keepalive
|
|
182
207
|
echo ""
|
|
183
|
-
echo "[5/
|
|
208
|
+
echo "[5/6] Configuring SSH keepalive..."
|
|
184
209
|
if ! grep -q "ClientAliveInterval" /etc/ssh/sshd_config; then
|
|
185
210
|
echo "ClientAliveInterval 60" | sudo tee -a /etc/ssh/sshd_config > /dev/null
|
|
186
211
|
echo "ClientAliveCountMax 10" | sudo tee -a /etc/ssh/sshd_config > /dev/null
|
|
@@ -190,5 +215,58 @@ else
|
|
|
190
215
|
echo " SSH keepalive already configured"
|
|
191
216
|
fi
|
|
192
217
|
|
|
218
|
+
# 6. VSCode workspace setup
|
|
219
|
+
echo ""
|
|
220
|
+
echo "[6/6] Setting up VSCode workspace..."
|
|
221
|
+
mkdir -p ~/workspace/.vscode
|
|
222
|
+
|
|
223
|
+
# Detect cuda-gdb path
|
|
224
|
+
CUDA_GDB_PATH=""
|
|
225
|
+
if command -v cuda-gdb &>/dev/null; then
|
|
226
|
+
CUDA_GDB_PATH=$(command -v cuda-gdb)
|
|
227
|
+
elif [ -x /usr/local/cuda/bin/cuda-gdb ]; then
|
|
228
|
+
CUDA_GDB_PATH="/usr/local/cuda/bin/cuda-gdb"
|
|
229
|
+
else
|
|
230
|
+
# Try glob for versioned CUDA installs
|
|
231
|
+
for p in /usr/local/cuda-*/bin/cuda-gdb; do
|
|
232
|
+
if [ -x "$p" ]; then
|
|
233
|
+
CUDA_GDB_PATH="$p"
|
|
234
|
+
fi
|
|
235
|
+
done
|
|
236
|
+
fi
|
|
237
|
+
if [ -z "$CUDA_GDB_PATH" ]; then
|
|
238
|
+
echo " WARNING: cuda-gdb not found — using placeholder in launch.json"
|
|
239
|
+
CUDA_GDB_PATH="cuda-gdb"
|
|
240
|
+
else
|
|
241
|
+
echo " cuda-gdb: $CUDA_GDB_PATH"
|
|
242
|
+
fi
|
|
243
|
+
|
|
244
|
+
# Detect GPU SM architecture
|
|
245
|
+
GPU_ARCH=""
|
|
246
|
+
if command -v nvidia-smi &>/dev/null; then
|
|
247
|
+
COMPUTE_CAP=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d '[:space:]')
|
|
248
|
+
if [ -n "$COMPUTE_CAP" ]; then
|
|
249
|
+
GPU_ARCH="sm_$(echo "$COMPUTE_CAP" | tr -d '.')"
|
|
250
|
+
fi
|
|
251
|
+
fi
|
|
252
|
+
if [ -z "$GPU_ARCH" ]; then
|
|
253
|
+
echo " WARNING: Could not detect GPU arch — defaulting to sm_75"
|
|
254
|
+
GPU_ARCH="sm_75"
|
|
255
|
+
else
|
|
256
|
+
echo " GPU arch: $GPU_ARCH"
|
|
257
|
+
fi
|
|
258
|
+
|
|
259
|
+
# Copy example CUDA source into workspace
|
|
260
|
+
cp /tmp/saxpy.cu ~/workspace/saxpy.cu
|
|
261
|
+
echo " Deployed saxpy.cu"
|
|
262
|
+
|
|
263
|
+
# Deploy launch.json with cuda-gdb path
|
|
264
|
+
sed "s|__CUDA_GDB_PATH__|${CUDA_GDB_PATH}|g" /tmp/launch.json > ~/workspace/.vscode/launch.json
|
|
265
|
+
echo " Deployed launch.json"
|
|
266
|
+
|
|
267
|
+
# Deploy tasks.json with GPU architecture
|
|
268
|
+
sed "s|__GPU_ARCH__|${GPU_ARCH}|g" /tmp/tasks.json > ~/workspace/.vscode/tasks.json
|
|
269
|
+
echo " Deployed tasks.json"
|
|
270
|
+
|
|
193
271
|
echo ""
|
|
194
272
|
echo "=== Remote setup complete ==="
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SAXPY Example, CUDA Style
|
|
3
|
+
* Source: https://developer.nvidia.com/blog/easy-introduction-cuda-c-and-c/
|
|
4
|
+
*
|
|
5
|
+
* This is included as an example CUDA C++ source file to try out the VS Code launch configuration we include on the host machine.
|
|
6
|
+
*
|
|
7
|
+
*/
|
|
8
|
+
#include <stdio.h>
|
|
9
|
+
|
|
10
|
+
__global__
|
|
11
|
+
void saxpy(int n, float a, float *x, float *y)
|
|
12
|
+
{
|
|
13
|
+
int i = blockIdx.x*blockDim.x + threadIdx.x;
|
|
14
|
+
if (i < n) y[i] = a*x[i] + y[i];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
int main(void)
|
|
18
|
+
{
|
|
19
|
+
int N = 1<<20;
|
|
20
|
+
float *x, *y, *d_x, *d_y;
|
|
21
|
+
x = (float*)malloc(N*sizeof(float));
|
|
22
|
+
y = (float*)malloc(N*sizeof(float));
|
|
23
|
+
|
|
24
|
+
cudaMalloc(&d_x, N*sizeof(float));
|
|
25
|
+
cudaMalloc(&d_y, N*sizeof(float));
|
|
26
|
+
|
|
27
|
+
for (int i = 0; i < N; i++) {
|
|
28
|
+
x[i] = 1.0f;
|
|
29
|
+
y[i] = 2.0f;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
|
|
33
|
+
cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
|
|
34
|
+
|
|
35
|
+
// Perform SAXPY on 1M elements
|
|
36
|
+
saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
|
|
37
|
+
|
|
38
|
+
cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
|
|
39
|
+
|
|
40
|
+
float maxError = 0.0f;
|
|
41
|
+
for (int i = 0; i < N; i++)
|
|
42
|
+
maxError = max(maxError, abs(y[i]-4.0f));
|
|
43
|
+
printf("Max error: %f\n", maxError);
|
|
44
|
+
|
|
45
|
+
cudaFree(d_x);
|
|
46
|
+
cudaFree(d_y);
|
|
47
|
+
free(x);
|
|
48
|
+
free(y);
|
|
49
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
// CUDA build tasks for VSCode
|
|
3
|
+
// Deployed to: ~/workspace/.vscode/tasks.json
|
|
4
|
+
"version": "2.0.0",
|
|
5
|
+
"tasks": [
|
|
6
|
+
{
|
|
7
|
+
"label": "nvcc: build active file (debug)",
|
|
8
|
+
"type": "shell",
|
|
9
|
+
"command": "nvcc",
|
|
10
|
+
"args": [
|
|
11
|
+
"-g", // Host debug symbols
|
|
12
|
+
"-G", // Device (GPU) debug symbols
|
|
13
|
+
"-O0", // No optimization
|
|
14
|
+
"-arch=__GPU_ARCH__", // GPU arch (auto-detected)
|
|
15
|
+
"-o",
|
|
16
|
+
"${fileDirname}/${fileBasenameNoExtension}",
|
|
17
|
+
"${file}"
|
|
18
|
+
],
|
|
19
|
+
"options": {
|
|
20
|
+
"cwd": "${fileDirname}"
|
|
21
|
+
},
|
|
22
|
+
"problemMatcher": ["$nvcc"],
|
|
23
|
+
"group": {
|
|
24
|
+
"kind": "build",
|
|
25
|
+
"isDefault": true
|
|
26
|
+
},
|
|
27
|
+
"detail": "Compile active .cu file with debug symbols (-g -G)"
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"label": "nvcc: build active file (release)",
|
|
31
|
+
"type": "shell",
|
|
32
|
+
"command": "nvcc",
|
|
33
|
+
"args": [
|
|
34
|
+
"-O3",
|
|
35
|
+
"-arch=__GPU_ARCH__",
|
|
36
|
+
"-o",
|
|
37
|
+
"${fileDirname}/${fileBasenameNoExtension}",
|
|
38
|
+
"${file}"
|
|
39
|
+
],
|
|
40
|
+
"options": {
|
|
41
|
+
"cwd": "${fileDirname}"
|
|
42
|
+
},
|
|
43
|
+
"problemMatcher": ["$nvcc"],
|
|
44
|
+
"group": "build",
|
|
45
|
+
"detail": "Compile active .cu file optimized (no debug)"
|
|
46
|
+
}
|
|
47
|
+
]
|
|
48
|
+
}
|