aws-bootstrap-g4dn 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aws_bootstrap/cli.py CHANGED
@@ -113,6 +113,12 @@ def main():
113
113
  @click.option("--no-setup", is_flag=True, default=False, help="Skip running the remote setup script.")
114
114
  @click.option("--dry-run", is_flag=True, default=False, help="Show what would be done without executing.")
115
115
  @click.option("--profile", default=None, help="AWS profile override (defaults to AWS_PROFILE env var).")
116
+ @click.option(
117
+ "--python-version",
118
+ default=None,
119
+ help="Python version for the remote venv (e.g. 3.13, 3.14.2). Passed to uv during setup.",
120
+ )
121
+ @click.option("--ssh-port", default=22, show_default=True, type=int, help="SSH port on the remote instance.")
116
122
  def launch(
117
123
  instance_type,
118
124
  ami_filter,
@@ -125,6 +131,8 @@ def launch(
125
131
  no_setup,
126
132
  dry_run,
127
133
  profile,
134
+ python_version,
135
+ ssh_port,
128
136
  ):
129
137
  """Launch a GPU-accelerated EC2 instance."""
130
138
  config = LaunchConfig(
@@ -137,6 +145,8 @@ def launch(
137
145
  volume_size=volume_size,
138
146
  run_setup=not no_setup,
139
147
  dry_run=dry_run,
148
+ ssh_port=ssh_port,
149
+ python_version=python_version,
140
150
  )
141
151
  if ami_filter:
142
152
  config.ami_filter = ami_filter
@@ -163,7 +173,7 @@ def launch(
163
173
 
164
174
  # Step 3: Security group
165
175
  step(3, 6, "Ensuring security group...")
166
- sg_id = ensure_security_group(ec2, config.security_group, config.tag_value)
176
+ sg_id = ensure_security_group(ec2, config.security_group, config.tag_value, ssh_port=config.ssh_port)
167
177
 
168
178
  pricing = "spot" if config.spot else "on-demand"
169
179
 
@@ -178,6 +188,10 @@ def launch(
178
188
  val("Volume", f"{config.volume_size} GB gp3")
179
189
  val("Region", config.region)
180
190
  val("Remote setup", "yes" if config.run_setup else "no")
191
+ if config.ssh_port != 22:
192
+ val("SSH port", str(config.ssh_port))
193
+ if config.python_version:
194
+ val("Python version", config.python_version)
181
195
  click.echo()
182
196
  click.secho("No resources launched (dry-run mode).", fg="yellow")
183
197
  return
@@ -202,9 +216,13 @@ def launch(
202
216
  # Step 6: SSH and remote setup
203
217
  step(6, 6, "Waiting for SSH access...")
204
218
  private_key = private_key_path(config.key_path)
205
- if not wait_for_ssh(public_ip, config.ssh_user, config.key_path):
219
+ if not wait_for_ssh(public_ip, config.ssh_user, config.key_path, port=config.ssh_port):
206
220
  warn("SSH did not become available within the timeout.")
207
- info(f"Instance is running try connecting manually: ssh -i {private_key} {config.ssh_user}@{public_ip}")
221
+ port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
222
+ info(
223
+ f"Instance is running — try connecting manually:"
224
+ f" ssh -i {private_key}{port_flag} {config.ssh_user}@{public_ip}"
225
+ )
208
226
  return
209
227
 
210
228
  if config.run_setup:
@@ -212,7 +230,9 @@ def launch(
212
230
  warn(f"Setup script not found at {SETUP_SCRIPT}, skipping.")
213
231
  else:
214
232
  info("Running remote setup...")
215
- if run_remote_setup(public_ip, config.ssh_user, config.key_path, SETUP_SCRIPT):
233
+ if run_remote_setup(
234
+ public_ip, config.ssh_user, config.key_path, SETUP_SCRIPT, config.python_version, port=config.ssh_port
235
+ ):
216
236
  success("Remote setup completed successfully.")
217
237
  else:
218
238
  warn("Remote setup failed. Instance is still running.")
@@ -224,6 +244,7 @@ def launch(
224
244
  user=config.ssh_user,
225
245
  key_path=config.key_path,
226
246
  alias_prefix=config.alias_prefix,
247
+ port=config.ssh_port,
227
248
  )
228
249
  success(f"Added SSH config alias: {alias}")
229
250
 
@@ -239,18 +260,27 @@ def launch(
239
260
  val("Pricing", pricing)
240
261
  val("SSH alias", alias)
241
262
 
263
+ port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
264
+
242
265
  click.echo()
243
266
  click.secho(" SSH:", fg="cyan")
244
- click.secho(f" ssh {alias}", bold=True)
245
- info(f"or: ssh -i {private_key} {config.ssh_user}@{public_ip}")
267
+ click.secho(f" ssh{port_flag} {alias}", bold=True)
268
+ info(f"or: ssh -i {private_key}{port_flag} {config.ssh_user}@{public_ip}")
246
269
 
247
270
  click.echo()
248
271
  click.secho(" Jupyter (via SSH tunnel):", fg="cyan")
249
- click.secho(f" ssh -NL 8888:localhost:8888 {alias}", bold=True)
250
- info(f"or: ssh -i {private_key} -NL 8888:localhost:8888 {config.ssh_user}@{public_ip}")
272
+ click.secho(f" ssh -NL 8888:localhost:8888{port_flag} {alias}", bold=True)
273
+ info(f"or: ssh -i {private_key} -NL 8888:localhost:8888{port_flag} {config.ssh_user}@{public_ip}")
251
274
  info("Then open: http://localhost:8888")
252
275
  info("Notebook: ~/gpu_smoke_test.ipynb (GPU smoke test)")
253
276
 
277
+ click.echo()
278
+ click.secho(" VSCode Remote SSH:", fg="cyan")
279
+ click.secho(
280
+ f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{config.ssh_user}",
281
+ bold=True,
282
+ )
283
+
254
284
  click.echo()
255
285
  click.secho(" GPU Benchmark:", fg="cyan")
256
286
  click.secho(f" ssh {alias} 'python ~/gpu_benchmark.py'", bold=True)
@@ -266,7 +296,14 @@ def launch(
266
296
  @click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
267
297
  @click.option("--profile", default=None, help="AWS profile override.")
268
298
  @click.option("--gpu", is_flag=True, default=False, help="Query GPU info (CUDA, driver) via SSH.")
269
- def status(region, profile, gpu):
299
+ @click.option(
300
+ "--instructions/--no-instructions",
301
+ "-I",
302
+ default=True,
303
+ show_default=True,
304
+ help="Show connection commands (SSH, Jupyter, VSCode) for each running instance.",
305
+ )
306
+ def status(region, profile, gpu, instructions):
270
307
  """Show running instances created by aws-bootstrap."""
271
308
  session = boto3.Session(profile_name=profile, region_name=region)
272
309
  ec2 = session.client("ec2")
@@ -305,11 +342,15 @@ def status(region, profile, gpu):
305
342
  if inst["PublicIp"]:
306
343
  val(" IP", inst["PublicIp"])
307
344
 
345
+ # Look up SSH config details once (used by --gpu and --with-instructions)
346
+ details = None
347
+ if (gpu or instructions) and state == "running" and inst["PublicIp"]:
348
+ details = get_ssh_host_details(inst["InstanceId"])
349
+
308
350
  # GPU info (opt-in, only for running instances with a public IP)
309
351
  if gpu and state == "running" and inst["PublicIp"]:
310
- details = get_ssh_host_details(inst["InstanceId"])
311
352
  if details:
312
- gpu_info = query_gpu_info(details.hostname, details.user, details.identity_file)
353
+ gpu_info = query_gpu_info(details.hostname, details.user, details.identity_file, port=details.port)
313
354
  else:
314
355
  gpu_info = query_gpu_info(
315
356
  inst["PublicIp"],
@@ -353,6 +394,29 @@ def status(region, profile, gpu):
353
394
  val(" Est. cost", f"~${est_cost:.4f}")
354
395
 
355
396
  val(" Launched", str(inst["LaunchTime"]))
397
+
398
+ # Connection instructions (opt-in, only for running instances with a public IP and alias)
399
+ if instructions and state == "running" and inst["PublicIp"] and alias:
400
+ user = details.user if details else "ubuntu"
401
+ port = details.port if details else 22
402
+ port_flag = f" -p {port}" if port != 22 else ""
403
+
404
+ click.echo()
405
+ click.secho(" SSH:", fg="cyan")
406
+ click.secho(f" ssh{port_flag} {alias}", bold=True)
407
+
408
+ click.secho(" Jupyter (via SSH tunnel):", fg="cyan")
409
+ click.secho(f" ssh -NL 8888:localhost:8888{port_flag} {alias}", bold=True)
410
+
411
+ click.secho(" VSCode Remote SSH:", fg="cyan")
412
+ click.secho(
413
+ f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{user}",
414
+ bold=True,
415
+ )
416
+
417
+ click.secho(" GPU Benchmark:", fg="cyan")
418
+ click.secho(f" ssh {alias} 'python ~/gpu_benchmark.py'", bold=True)
419
+
356
420
  click.echo()
357
421
  first_id = instances[0]["InstanceId"]
358
422
  click.echo(" To terminate: " + click.style(f"aws-bootstrap terminate {first_id}", bold=True))
aws_bootstrap/config.py CHANGED
@@ -22,3 +22,5 @@ class LaunchConfig:
22
22
  ssh_user: str = "ubuntu"
23
23
  tag_value: str = "aws-bootstrap-g4dn"
24
24
  alias_prefix: str = "aws-gpu"
25
+ ssh_port: int = 22
26
+ python_version: str | None = None
aws_bootstrap/ec2.py CHANGED
@@ -59,7 +59,7 @@ def get_latest_ami(ec2_client, ami_filter: str) -> dict:
59
59
  return images[0]
60
60
 
61
61
 
62
- def ensure_security_group(ec2_client, name: str, tag_value: str) -> str:
62
+ def ensure_security_group(ec2_client, name: str, tag_value: str, ssh_port: int = 22) -> str:
63
63
  """Find or create a security group with SSH ingress in the default VPC."""
64
64
  # Find default VPC
65
65
  vpcs = ec2_client.describe_vpcs(Filters=[{"Name": "isDefault", "Values": ["true"]}])
@@ -103,8 +103,8 @@ def ensure_security_group(ec2_client, name: str, tag_value: str) -> str:
103
103
  IpPermissions=[
104
104
  {
105
105
  "IpProtocol": "tcp",
106
- "FromPort": 22,
107
- "ToPort": 22,
106
+ "FromPort": ssh_port,
107
+ "ToPort": ssh_port,
108
108
  "IpRanges": [{"CidrIp": "0.0.0.0/0", "Description": "SSH access"}],
109
109
  }
110
110
  ],
aws_bootstrap/gpu.py ADDED
@@ -0,0 +1,27 @@
1
+ """GPU architecture mapping and GPU info dataclass."""
2
+
3
+ from __future__ import annotations
4
+ from dataclasses import dataclass
5
+
6
+
7
+ _GPU_ARCHITECTURES: dict[str, str] = {
8
+ "7.0": "Volta",
9
+ "7.5": "Turing",
10
+ "8.0": "Ampere",
11
+ "8.6": "Ampere",
12
+ "8.7": "Ampere",
13
+ "8.9": "Ada Lovelace",
14
+ "9.0": "Hopper",
15
+ }
16
+
17
+
18
+ @dataclass
19
+ class GpuInfo:
20
+ """GPU information retrieved via nvidia-smi and nvcc."""
21
+
22
+ driver_version: str
23
+ cuda_driver_version: str # max CUDA version supported by driver (from nvidia-smi)
24
+ cuda_toolkit_version: str | None # actual CUDA toolkit installed (from nvcc), None if unavailable
25
+ gpu_name: str
26
+ compute_capability: str
27
+ architecture: str
@@ -34,7 +34,13 @@ if ! command -v uv &>/dev/null; then
34
34
  fi
35
35
  export PATH="$HOME/.local/bin:$PATH"
36
36
 
37
- uv venv ~/venv
37
+ if [ -n "${PYTHON_VERSION:-}" ]; then
38
+ echo " Installing Python ${PYTHON_VERSION}..."
39
+ uv python install "$PYTHON_VERSION"
40
+ uv venv --python "$PYTHON_VERSION" ~/venv
41
+ else
42
+ uv venv ~/venv
43
+ fi
38
44
 
39
45
  # --- CUDA-aware PyTorch installation ---
40
46
  # Known PyTorch CUDA wheel tags (ascending order).
aws_bootstrap/ssh.py CHANGED
@@ -12,6 +12,8 @@ from pathlib import Path
12
12
 
13
13
  import click
14
14
 
15
+ from .gpu import _GPU_ARCHITECTURES, GpuInfo
16
+
15
17
 
16
18
  # ---------------------------------------------------------------------------
17
19
  # SSH config markers
@@ -72,17 +74,18 @@ def import_key_pair(ec2_client, key_name: str, key_path: Path) -> str:
72
74
  return key_name
73
75
 
74
76
 
75
- def wait_for_ssh(host: str, user: str, key_path: Path, retries: int = 30, delay: int = 10) -> bool:
77
+ def wait_for_ssh(host: str, user: str, key_path: Path, retries: int = 30, delay: int = 10, port: int = 22) -> bool:
76
78
  """Wait for SSH to become available on the instance.
77
79
 
78
- Tries a TCP connection to port 22 first, then an actual SSH command.
80
+ Tries a TCP connection to the SSH port first, then an actual SSH command.
79
81
  """
80
82
  base_opts = _ssh_opts(key_path)
83
+ port_opts = ["-p", str(port)] if port != 22 else []
81
84
 
82
85
  for attempt in range(1, retries + 1):
83
- # First check if port 22 is open
86
+ # First check if the SSH port is open
84
87
  try:
85
- sock = socket.create_connection((host, 22), timeout=5)
88
+ sock = socket.create_connection((host, port), timeout=5)
86
89
  sock.close()
87
90
  except (TimeoutError, ConnectionRefusedError, OSError):
88
91
  click.echo(" SSH not ready " + click.style(f"(attempt {attempt}/{retries})", dim=True) + ", waiting...")
@@ -90,11 +93,18 @@ def wait_for_ssh(host: str, user: str, key_path: Path, retries: int = 30, delay:
90
93
  continue
91
94
 
92
95
  # Port is open, try actual SSH
93
- result = subprocess.run(
94
- ["ssh", *base_opts, "-o", "ConnectTimeout=10", "-o", "BatchMode=yes", f"{user}@{host}", "echo ok"],
95
- capture_output=True,
96
- text=True,
97
- )
96
+ cmd = [
97
+ "ssh",
98
+ *base_opts,
99
+ *port_opts,
100
+ "-o",
101
+ "ConnectTimeout=10",
102
+ "-o",
103
+ "BatchMode=yes",
104
+ f"{user}@{host}",
105
+ "echo ok",
106
+ ]
107
+ result = subprocess.run(cmd, capture_output=True, text=True)
98
108
  if result.returncode == 0:
99
109
  click.secho(" SSH connection established.", fg="green")
100
110
  return True
@@ -105,15 +115,19 @@ def wait_for_ssh(host: str, user: str, key_path: Path, retries: int = 30, delay:
105
115
  return False
106
116
 
107
117
 
108
- def run_remote_setup(host: str, user: str, key_path: Path, script_path: Path) -> bool:
118
+ def run_remote_setup(
119
+ host: str, user: str, key_path: Path, script_path: Path, python_version: str | None = None, port: int = 22
120
+ ) -> bool:
109
121
  """SCP the setup script and requirements.txt to the instance and execute."""
110
122
  ssh_opts = _ssh_opts(key_path)
123
+ scp_port_opts = ["-P", str(port)] if port != 22 else []
124
+ ssh_port_opts = ["-p", str(port)] if port != 22 else []
111
125
  requirements_path = script_path.parent / "requirements.txt"
112
126
 
113
127
  # SCP the requirements file
114
128
  click.echo(" Uploading requirements.txt...")
115
129
  req_result = subprocess.run(
116
- ["scp", *ssh_opts, str(requirements_path), f"{user}@{host}:/tmp/requirements.txt"],
130
+ ["scp", *ssh_opts, *scp_port_opts, str(requirements_path), f"{user}@{host}:/tmp/requirements.txt"],
117
131
  capture_output=True,
118
132
  text=True,
119
133
  )
@@ -125,7 +139,7 @@ def run_remote_setup(host: str, user: str, key_path: Path, script_path: Path) ->
125
139
  benchmark_path = script_path.parent / "gpu_benchmark.py"
126
140
  click.echo(" Uploading gpu_benchmark.py...")
127
141
  bench_result = subprocess.run(
128
- ["scp", *ssh_opts, str(benchmark_path), f"{user}@{host}:/tmp/gpu_benchmark.py"],
142
+ ["scp", *ssh_opts, *scp_port_opts, str(benchmark_path), f"{user}@{host}:/tmp/gpu_benchmark.py"],
129
143
  capture_output=True,
130
144
  text=True,
131
145
  )
@@ -137,7 +151,7 @@ def run_remote_setup(host: str, user: str, key_path: Path, script_path: Path) ->
137
151
  notebook_path = script_path.parent / "gpu_smoke_test.ipynb"
138
152
  click.echo(" Uploading gpu_smoke_test.ipynb...")
139
153
  nb_result = subprocess.run(
140
- ["scp", *ssh_opts, str(notebook_path), f"{user}@{host}:/tmp/gpu_smoke_test.ipynb"],
154
+ ["scp", *ssh_opts, *scp_port_opts, str(notebook_path), f"{user}@{host}:/tmp/gpu_smoke_test.ipynb"],
141
155
  capture_output=True,
142
156
  text=True,
143
157
  )
@@ -148,7 +162,7 @@ def run_remote_setup(host: str, user: str, key_path: Path, script_path: Path) ->
148
162
  # SCP the script
149
163
  click.echo(" Uploading remote_setup.sh...")
150
164
  scp_result = subprocess.run(
151
- ["scp", *ssh_opts, str(script_path), f"{user}@{host}:/tmp/remote_setup.sh"],
165
+ ["scp", *ssh_opts, *scp_port_opts, str(script_path), f"{user}@{host}:/tmp/remote_setup.sh"],
152
166
  capture_output=True,
153
167
  text=True,
154
168
  )
@@ -156,10 +170,14 @@ def run_remote_setup(host: str, user: str, key_path: Path, script_path: Path) ->
156
170
  click.secho(f" SCP failed: {scp_result.stderr}", fg="red", err=True)
157
171
  return False
158
172
 
159
- # Execute the script
173
+ # Execute the script, passing PYTHON_VERSION as an inline env var if specified
160
174
  click.echo(" Running remote_setup.sh on instance...")
175
+ remote_cmd = "chmod +x /tmp/remote_setup.sh && "
176
+ if python_version:
177
+ remote_cmd += f"PYTHON_VERSION={python_version} "
178
+ remote_cmd += "/tmp/remote_setup.sh"
161
179
  ssh_result = subprocess.run(
162
- ["ssh", *ssh_opts, f"{user}@{host}", "chmod +x /tmp/remote_setup.sh && /tmp/remote_setup.sh"],
180
+ ["ssh", *ssh_opts, *ssh_port_opts, f"{user}@{host}", remote_cmd],
163
181
  capture_output=False,
164
182
  )
165
183
  return ssh_result.returncode == 0
@@ -222,15 +240,17 @@ def _next_alias(content: str, prefix: str = "aws-gpu") -> str:
222
240
  return f"{prefix}{max_n + 1}"
223
241
 
224
242
 
225
- def _build_stanza(instance_id: str, alias: str, hostname: str, user: str, key_path: Path) -> str:
243
+ def _build_stanza(instance_id: str, alias: str, hostname: str, user: str, key_path: Path, port: int = 22) -> str:
226
244
  """Build a complete SSH config stanza with markers."""
227
245
  priv_key = private_key_path(key_path)
246
+ port_line = f" Port {port}\n" if port != 22 else ""
228
247
  return (
229
248
  f"{_BEGIN_MARKER.format(instance_id=instance_id)}\n"
230
249
  f"Host {alias}\n"
231
250
  f" HostName {hostname}\n"
232
251
  f" User {user}\n"
233
252
  f" IdentityFile {priv_key}\n"
253
+ f"{port_line}"
234
254
  f" StrictHostKeyChecking no\n"
235
255
  f" UserKnownHostsFile /dev/null\n"
236
256
  f"{_END_MARKER.format(instance_id=instance_id)}\n"
@@ -244,6 +264,7 @@ def add_ssh_host(
244
264
  key_path: Path,
245
265
  config_path: Path | None = None,
246
266
  alias_prefix: str = "aws-gpu",
267
+ port: int = 22,
247
268
  ) -> str:
248
269
  """Add (or update) an SSH host stanza for *instance_id*.
249
270
 
@@ -257,7 +278,7 @@ def add_ssh_host(
257
278
  content = _remove_block(content, instance_id)
258
279
 
259
280
  alias = existing_alias or _next_alias(content, alias_prefix)
260
- stanza = _build_stanza(instance_id, alias, hostname, user, key_path)
281
+ stanza = _build_stanza(instance_id, alias, hostname, user, key_path, port=port)
261
282
 
262
283
  # Ensure a blank line before our block if file has content
263
284
  if content and not content.endswith("\n\n") and not content.endswith("\n"):
@@ -317,21 +338,6 @@ def list_ssh_hosts(config_path: Path | None = None) -> dict[str, str]:
317
338
  return result
318
339
 
319
340
 
320
- # ---------------------------------------------------------------------------
321
- # GPU info via SSH
322
- # ---------------------------------------------------------------------------
323
-
324
- _GPU_ARCHITECTURES: dict[str, str] = {
325
- "7.0": "Volta",
326
- "7.5": "Turing",
327
- "8.0": "Ampere",
328
- "8.6": "Ampere",
329
- "8.7": "Ampere",
330
- "8.9": "Ada Lovelace",
331
- "9.0": "Hopper",
332
- }
333
-
334
-
335
341
  @dataclass
336
342
  class SSHHostDetails:
337
343
  """Connection details parsed from an SSH config stanza."""
@@ -339,18 +345,7 @@ class SSHHostDetails:
339
345
  hostname: str
340
346
  user: str
341
347
  identity_file: Path
342
-
343
-
344
- @dataclass
345
- class GpuInfo:
346
- """GPU information retrieved via nvidia-smi and nvcc."""
347
-
348
- driver_version: str
349
- cuda_driver_version: str # max CUDA version supported by driver (from nvidia-smi)
350
- cuda_toolkit_version: str | None # actual CUDA toolkit installed (from nvcc), None if unavailable
351
- gpu_name: str
352
- compute_capability: str
353
- architecture: str
348
+ port: int = 22
354
349
 
355
350
 
356
351
  def get_ssh_host_details(instance_id: str, config_path: Path | None = None) -> SSHHostDetails | None:
@@ -371,6 +366,7 @@ def get_ssh_host_details(instance_id: str, config_path: Path | None = None) -> S
371
366
  hostname: str | None = None
372
367
  user: str | None = None
373
368
  identity_file: str | None = None
369
+ port: int = 22
374
370
 
375
371
  for line in content.splitlines():
376
372
  if line == begin_marker:
@@ -378,7 +374,7 @@ def get_ssh_host_details(instance_id: str, config_path: Path | None = None) -> S
378
374
  continue
379
375
  if line == end_marker and in_block:
380
376
  if hostname and user and identity_file:
381
- return SSHHostDetails(hostname=hostname, user=user, identity_file=Path(identity_file))
377
+ return SSHHostDetails(hostname=hostname, user=user, identity_file=Path(identity_file), port=port)
382
378
  return None
383
379
  if in_block:
384
380
  stripped = line.strip()
@@ -388,17 +384,20 @@ def get_ssh_host_details(instance_id: str, config_path: Path | None = None) -> S
388
384
  user = stripped.removeprefix("User ").strip()
389
385
  elif stripped.startswith("IdentityFile "):
390
386
  identity_file = stripped.removeprefix("IdentityFile ").strip()
387
+ elif stripped.startswith("Port "):
388
+ port = int(stripped.removeprefix("Port ").strip())
391
389
 
392
390
  return None
393
391
 
394
392
 
395
- def query_gpu_info(host: str, user: str, key_path: Path, timeout: int = 10) -> GpuInfo | None:
393
+ def query_gpu_info(host: str, user: str, key_path: Path, timeout: int = 10, port: int = 22) -> GpuInfo | None:
396
394
  """SSH into a host and query GPU info via ``nvidia-smi``.
397
395
 
398
396
  Returns ``GpuInfo`` on success, or ``None`` if the SSH connection fails,
399
397
  ``nvidia-smi`` is unavailable, or the output is malformed.
400
398
  """
401
399
  ssh_opts = _ssh_opts(key_path)
400
+ port_opts = ["-p", str(port)] if port != 22 else []
402
401
  remote_cmd = (
403
402
  "nvidia-smi --query-gpu=driver_version,name,compute_cap --format=csv,noheader,nounits"
404
403
  " && nvidia-smi | grep -oP 'CUDA Version: \\K[\\d.]+'"
@@ -407,6 +406,7 @@ def query_gpu_info(host: str, user: str, key_path: Path, timeout: int = 10) -> G
407
406
  cmd = [
408
407
  "ssh",
409
408
  *ssh_opts,
409
+ *port_opts,
410
410
  "-o",
411
411
  f"ConnectTimeout={timeout}",
412
412
  "-o",
@@ -9,7 +9,8 @@ import botocore.exceptions
9
9
  from click.testing import CliRunner
10
10
 
11
11
  from aws_bootstrap.cli import main
12
- from aws_bootstrap.ssh import GpuInfo, SSHHostDetails
12
+ from aws_bootstrap.gpu import GpuInfo
13
+ from aws_bootstrap.ssh import SSHHostDetails
13
14
 
14
15
 
15
16
  def test_help():
@@ -73,11 +74,12 @@ def test_status_no_instances(mock_find, mock_session):
73
74
  assert "No active" in result.output
74
75
 
75
76
 
77
+ @patch("aws_bootstrap.cli.get_ssh_host_details", return_value=None)
76
78
  @patch("aws_bootstrap.cli.list_ssh_hosts", return_value={})
77
79
  @patch("aws_bootstrap.cli.boto3.Session")
78
80
  @patch("aws_bootstrap.cli.get_spot_price")
79
81
  @patch("aws_bootstrap.cli.find_tagged_instances")
80
- def test_status_shows_instances(mock_find, mock_spot_price, mock_session, mock_ssh_hosts):
82
+ def test_status_shows_instances(mock_find, mock_spot_price, mock_session, mock_ssh_hosts, mock_details):
81
83
  mock_find.return_value = [
82
84
  {
83
85
  "InstanceId": "i-abc123",
@@ -101,11 +103,12 @@ def test_status_shows_instances(mock_find, mock_spot_price, mock_session, mock_s
101
103
  assert "Est. cost" in result.output
102
104
 
103
105
 
106
+ @patch("aws_bootstrap.cli.get_ssh_host_details", return_value=None)
104
107
  @patch("aws_bootstrap.cli.list_ssh_hosts", return_value={})
105
108
  @patch("aws_bootstrap.cli.boto3.Session")
106
109
  @patch("aws_bootstrap.cli.get_spot_price")
107
110
  @patch("aws_bootstrap.cli.find_tagged_instances")
108
- def test_status_on_demand_no_cost(mock_find, mock_spot_price, mock_session, mock_ssh_hosts):
111
+ def test_status_on_demand_no_cost(mock_find, mock_spot_price, mock_session, mock_ssh_hosts, mock_details):
109
112
  mock_find.return_value = [
110
113
  {
111
114
  "InstanceId": "i-ondemand",
@@ -351,11 +354,12 @@ def test_terminate_removes_ssh_config(mock_terminate, mock_find, mock_session, m
351
354
  mock_remove_ssh.assert_called_once_with("i-abc123")
352
355
 
353
356
 
357
+ @patch("aws_bootstrap.cli.get_ssh_host_details", return_value=None)
354
358
  @patch("aws_bootstrap.cli.list_ssh_hosts")
355
359
  @patch("aws_bootstrap.cli.boto3.Session")
356
360
  @patch("aws_bootstrap.cli.get_spot_price")
357
361
  @patch("aws_bootstrap.cli.find_tagged_instances")
358
- def test_status_shows_alias(mock_find, mock_spot_price, mock_session, mock_ssh_hosts):
362
+ def test_status_shows_alias(mock_find, mock_spot_price, mock_session, mock_ssh_hosts, mock_details):
359
363
  mock_find.return_value = [
360
364
  {
361
365
  "InstanceId": "i-abc123",
@@ -376,11 +380,12 @@ def test_status_shows_alias(mock_find, mock_spot_price, mock_session, mock_ssh_h
376
380
  assert "aws-gpu1" in result.output
377
381
 
378
382
 
383
+ @patch("aws_bootstrap.cli.get_ssh_host_details", return_value=None)
379
384
  @patch("aws_bootstrap.cli.list_ssh_hosts", return_value={})
380
385
  @patch("aws_bootstrap.cli.boto3.Session")
381
386
  @patch("aws_bootstrap.cli.get_spot_price")
382
387
  @patch("aws_bootstrap.cli.find_tagged_instances")
383
- def test_status_no_alias_graceful(mock_find, mock_spot_price, mock_session, mock_ssh_hosts):
388
+ def test_status_no_alias_graceful(mock_find, mock_spot_price, mock_session, mock_ssh_hosts, mock_details):
384
389
  mock_find.return_value = [
385
390
  {
386
391
  "InstanceId": "i-old999",
@@ -520,13 +525,98 @@ def test_status_gpu_skips_non_running(mock_find, mock_session, mock_ssh_hosts, m
520
525
  @patch("aws_bootstrap.cli.boto3.Session")
521
526
  @patch("aws_bootstrap.cli.get_spot_price", return_value=0.15)
522
527
  @patch("aws_bootstrap.cli.find_tagged_instances")
523
- def test_status_without_gpu_flag_no_ssh(mock_find, mock_spot, mock_session, mock_ssh_hosts, mock_details, mock_gpu):
528
+ def test_status_without_gpu_flag_no_gpu_query(
529
+ mock_find, mock_spot, mock_session, mock_ssh_hosts, mock_details, mock_gpu
530
+ ):
524
531
  mock_find.return_value = [_RUNNING_INSTANCE]
525
532
  runner = CliRunner()
526
533
  result = runner.invoke(main, ["status"])
527
534
  assert result.exit_code == 0
528
535
  mock_gpu.assert_not_called()
529
- mock_details.assert_not_called()
536
+
537
+
538
+ # ---------------------------------------------------------------------------
539
+ # --instructions / --no-instructions / -I flag tests
540
+ # ---------------------------------------------------------------------------
541
+
542
+
543
+ def test_status_help_shows_instructions_flag():
544
+ runner = CliRunner()
545
+ result = runner.invoke(main, ["status", "--help"])
546
+ assert result.exit_code == 0
547
+ assert "--instructions" in result.output
548
+ assert "--no-instructions" in result.output
549
+ assert "-I" in result.output
550
+
551
+
552
+ @patch("aws_bootstrap.cli.get_ssh_host_details")
553
+ @patch("aws_bootstrap.cli.list_ssh_hosts", return_value={"i-abc123": "aws-gpu1"})
554
+ @patch("aws_bootstrap.cli.boto3.Session")
555
+ @patch("aws_bootstrap.cli.get_spot_price", return_value=0.15)
556
+ @patch("aws_bootstrap.cli.find_tagged_instances")
557
+ def test_status_instructions_shown_by_default(mock_find, mock_spot, mock_session, mock_ssh_hosts, mock_details):
558
+ """Instructions are shown by default (no flag needed)."""
559
+ mock_find.return_value = [_RUNNING_INSTANCE]
560
+ mock_details.return_value = SSHHostDetails(
561
+ hostname="1.2.3.4", user="ubuntu", identity_file=Path("/home/user/.ssh/id_ed25519")
562
+ )
563
+ runner = CliRunner()
564
+ result = runner.invoke(main, ["status"])
565
+ assert result.exit_code == 0
566
+ assert "ssh aws-gpu1" in result.output
567
+ assert "ssh -NL 8888:localhost:8888 aws-gpu1" in result.output
568
+ assert "vscode-remote://ssh-remote+aws-gpu1/home/ubuntu" in result.output
569
+ assert "python ~/gpu_benchmark.py" in result.output
570
+
571
+
572
+ @patch("aws_bootstrap.cli.get_ssh_host_details")
573
+ @patch("aws_bootstrap.cli.list_ssh_hosts", return_value={"i-abc123": "aws-gpu1"})
574
+ @patch("aws_bootstrap.cli.boto3.Session")
575
+ @patch("aws_bootstrap.cli.get_spot_price", return_value=0.15)
576
+ @patch("aws_bootstrap.cli.find_tagged_instances")
577
+ def test_status_no_instructions_suppresses_commands(mock_find, mock_spot, mock_session, mock_ssh_hosts, mock_details):
578
+ """--no-instructions suppresses connection commands."""
579
+ mock_find.return_value = [_RUNNING_INSTANCE]
580
+ mock_details.return_value = SSHHostDetails(
581
+ hostname="1.2.3.4", user="ubuntu", identity_file=Path("/home/user/.ssh/id_ed25519")
582
+ )
583
+ runner = CliRunner()
584
+ result = runner.invoke(main, ["status", "--no-instructions"])
585
+ assert result.exit_code == 0
586
+ assert "vscode-remote" not in result.output
587
+ assert "Jupyter" not in result.output
588
+
589
+
590
+ @patch("aws_bootstrap.cli.get_ssh_host_details")
591
+ @patch("aws_bootstrap.cli.list_ssh_hosts", return_value={})
592
+ @patch("aws_bootstrap.cli.boto3.Session")
593
+ @patch("aws_bootstrap.cli.get_spot_price", return_value=0.15)
594
+ @patch("aws_bootstrap.cli.find_tagged_instances")
595
+ def test_status_instructions_no_alias_skips(mock_find, mock_spot, mock_session, mock_ssh_hosts, mock_details):
596
+ """Instances without an SSH alias don't get connection instructions."""
597
+ mock_find.return_value = [_RUNNING_INSTANCE]
598
+ runner = CliRunner()
599
+ result = runner.invoke(main, ["status"])
600
+ assert result.exit_code == 0
601
+ assert "ssh aws-gpu" not in result.output
602
+ assert "vscode-remote" not in result.output
603
+
604
+
605
+ @patch("aws_bootstrap.cli.get_ssh_host_details")
606
+ @patch("aws_bootstrap.cli.list_ssh_hosts", return_value={"i-abc123": "aws-gpu1"})
607
+ @patch("aws_bootstrap.cli.boto3.Session")
608
+ @patch("aws_bootstrap.cli.get_spot_price", return_value=0.15)
609
+ @patch("aws_bootstrap.cli.find_tagged_instances")
610
+ def test_status_instructions_non_default_port(mock_find, mock_spot, mock_session, mock_ssh_hosts, mock_details):
611
+ mock_find.return_value = [_RUNNING_INSTANCE]
612
+ mock_details.return_value = SSHHostDetails(
613
+ hostname="1.2.3.4", user="ubuntu", identity_file=Path("/home/user/.ssh/id_ed25519"), port=2222
614
+ )
615
+ runner = CliRunner()
616
+ result = runner.invoke(main, ["status"])
617
+ assert result.exit_code == 0
618
+ assert "ssh -p 2222 aws-gpu1" in result.output
619
+ assert "ssh -NL 8888:localhost:8888 -p 2222 aws-gpu1" in result.output
530
620
 
531
621
 
532
622
  # ---------------------------------------------------------------------------
@@ -636,3 +726,111 @@ def test_no_credentials_caught_on_list(mock_session, mock_list):
636
726
  result = runner.invoke(main, ["list", "instance-types"])
637
727
  assert result.exit_code != 0
638
728
  assert "Unable to locate AWS credentials" in result.output
729
+
730
+
731
+ # ---------------------------------------------------------------------------
732
+ # --python-version tests
733
+ # ---------------------------------------------------------------------------
734
+
735
+
736
+ @patch("aws_bootstrap.cli.add_ssh_host", return_value="aws-gpu1")
737
+ @patch("aws_bootstrap.cli.run_remote_setup", return_value=True)
738
+ @patch("aws_bootstrap.cli.wait_for_ssh", return_value=True)
739
+ @patch("aws_bootstrap.cli.wait_instance_ready")
740
+ @patch("aws_bootstrap.cli.launch_instance")
741
+ @patch("aws_bootstrap.cli.ensure_security_group", return_value="sg-123")
742
+ @patch("aws_bootstrap.cli.import_key_pair", return_value="aws-bootstrap-key")
743
+ @patch("aws_bootstrap.cli.get_latest_ami")
744
+ @patch("aws_bootstrap.cli.boto3.Session")
745
+ def test_launch_python_version_passed_to_setup(
746
+ mock_session, mock_ami, mock_import, mock_sg, mock_launch, mock_wait, mock_ssh, mock_setup, mock_add_ssh, tmp_path
747
+ ):
748
+ mock_ami.return_value = {"ImageId": "ami-123", "Name": "TestAMI"}
749
+ mock_launch.return_value = {"InstanceId": "i-test123"}
750
+ mock_wait.return_value = {"PublicIpAddress": "1.2.3.4"}
751
+
752
+ key_path = tmp_path / "id_ed25519.pub"
753
+ key_path.write_text("ssh-ed25519 AAAA test@host")
754
+
755
+ runner = CliRunner()
756
+ result = runner.invoke(main, ["launch", "--key-path", str(key_path), "--python-version", "3.13"])
757
+ assert result.exit_code == 0
758
+ mock_setup.assert_called_once()
759
+ assert mock_setup.call_args[0][4] == "3.13"
760
+
761
+
762
+ @patch("aws_bootstrap.cli.boto3.Session")
763
+ @patch("aws_bootstrap.cli.get_latest_ami")
764
+ @patch("aws_bootstrap.cli.import_key_pair", return_value="aws-bootstrap-key")
765
+ @patch("aws_bootstrap.cli.ensure_security_group", return_value="sg-123")
766
+ def test_launch_dry_run_shows_python_version(mock_sg, mock_import, mock_ami, mock_session, tmp_path):
767
+ mock_ami.return_value = {"ImageId": "ami-123", "Name": "TestAMI"}
768
+
769
+ key_path = tmp_path / "id_ed25519.pub"
770
+ key_path.write_text("ssh-ed25519 AAAA test@host")
771
+
772
+ runner = CliRunner()
773
+ result = runner.invoke(main, ["launch", "--key-path", str(key_path), "--dry-run", "--python-version", "3.14.2"])
774
+ assert result.exit_code == 0
775
+ assert "3.14.2" in result.output
776
+ assert "Python version" in result.output
777
+
778
+
779
+ @patch("aws_bootstrap.cli.boto3.Session")
780
+ @patch("aws_bootstrap.cli.get_latest_ami")
781
+ @patch("aws_bootstrap.cli.import_key_pair", return_value="aws-bootstrap-key")
782
+ @patch("aws_bootstrap.cli.ensure_security_group", return_value="sg-123")
783
+ def test_launch_dry_run_omits_python_version_when_unset(mock_sg, mock_import, mock_ami, mock_session, tmp_path):
784
+ mock_ami.return_value = {"ImageId": "ami-123", "Name": "TestAMI"}
785
+
786
+ key_path = tmp_path / "id_ed25519.pub"
787
+ key_path.write_text("ssh-ed25519 AAAA test@host")
788
+
789
+ runner = CliRunner()
790
+ result = runner.invoke(main, ["launch", "--key-path", str(key_path), "--dry-run"])
791
+ assert result.exit_code == 0
792
+ assert "Python version" not in result.output
793
+
794
+
795
+ # ---------------------------------------------------------------------------
796
+ # --ssh-port tests
797
+ # ---------------------------------------------------------------------------
798
+
799
+
800
+ def test_launch_help_shows_ssh_port():
801
+ runner = CliRunner()
802
+ result = runner.invoke(main, ["launch", "--help"])
803
+ assert result.exit_code == 0
804
+ assert "--ssh-port" in result.output
805
+
806
+
807
+ @patch("aws_bootstrap.cli.boto3.Session")
808
+ @patch("aws_bootstrap.cli.get_latest_ami")
809
+ @patch("aws_bootstrap.cli.import_key_pair", return_value="aws-bootstrap-key")
810
+ @patch("aws_bootstrap.cli.ensure_security_group", return_value="sg-123")
811
+ def test_launch_dry_run_shows_ssh_port_when_non_default(mock_sg, mock_import, mock_ami, mock_session, tmp_path):
812
+ mock_ami.return_value = {"ImageId": "ami-123", "Name": "TestAMI"}
813
+
814
+ key_path = tmp_path / "id_ed25519.pub"
815
+ key_path.write_text("ssh-ed25519 AAAA test@host")
816
+
817
+ runner = CliRunner()
818
+ result = runner.invoke(main, ["launch", "--key-path", str(key_path), "--dry-run", "--ssh-port", "2222"])
819
+ assert result.exit_code == 0
820
+ assert "2222" in result.output
821
+
822
+
823
+ @patch("aws_bootstrap.cli.boto3.Session")
824
+ @patch("aws_bootstrap.cli.get_latest_ami")
825
+ @patch("aws_bootstrap.cli.import_key_pair", return_value="aws-bootstrap-key")
826
+ @patch("aws_bootstrap.cli.ensure_security_group", return_value="sg-123")
827
+ def test_launch_dry_run_omits_ssh_port_when_default(mock_sg, mock_import, mock_ami, mock_session, tmp_path):
828
+ mock_ami.return_value = {"ImageId": "ami-123", "Name": "TestAMI"}
829
+
830
+ key_path = tmp_path / "id_ed25519.pub"
831
+ key_path.write_text("ssh-ed25519 AAAA test@host")
832
+
833
+ runner = CliRunner()
834
+ result = runner.invoke(main, ["launch", "--key-path", str(key_path), "--dry-run"])
835
+ assert result.exit_code == 0
836
+ assert "SSH port" not in result.output
@@ -0,0 +1,98 @@
1
+ """Tests for GPU info queries via SSH (query_gpu_info, GPU architecture mapping)."""
2
+
3
+ from __future__ import annotations
4
+ import subprocess
5
+ from pathlib import Path
6
+ from unittest.mock import patch
7
+
8
+ from aws_bootstrap.gpu import _GPU_ARCHITECTURES, GpuInfo
9
+ from aws_bootstrap.ssh import query_gpu_info
10
+
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # query_gpu_info
14
+ # ---------------------------------------------------------------------------
15
+
16
+ NVIDIA_SMI_OUTPUT = "560.35.03, Tesla T4, 7.5\n12.8\n12.6\n"
17
+
18
+
19
+ @patch("aws_bootstrap.ssh.subprocess.run")
20
+ def test_query_gpu_info_success(mock_run):
21
+ """Successful nvidia-smi + nvcc output returns a valid GpuInfo."""
22
+ mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=NVIDIA_SMI_OUTPUT, stderr="")
23
+
24
+ info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
25
+ assert info is not None
26
+ assert isinstance(info, GpuInfo)
27
+ assert info.driver_version == "560.35.03"
28
+ assert info.cuda_driver_version == "12.8"
29
+ assert info.cuda_toolkit_version == "12.6"
30
+ assert info.gpu_name == "Tesla T4"
31
+ assert info.compute_capability == "7.5"
32
+ assert info.architecture == "Turing"
33
+
34
+
35
+ @patch("aws_bootstrap.ssh.subprocess.run")
36
+ def test_query_gpu_info_no_nvcc(mock_run):
37
+ """When nvcc is unavailable, cuda_toolkit_version is None."""
38
+ output = "560.35.03, Tesla T4, 7.5\n12.8\nN/A\n"
39
+ mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=output, stderr="")
40
+
41
+ info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
42
+ assert info is not None
43
+ assert info.cuda_driver_version == "12.8"
44
+ assert info.cuda_toolkit_version is None
45
+
46
+
47
+ @patch("aws_bootstrap.ssh.subprocess.run")
48
+ def test_query_gpu_info_ssh_failure(mock_run):
49
+ """Non-zero exit code returns None."""
50
+ mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=255, stdout="", stderr="Connection refused")
51
+
52
+ info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
53
+ assert info is None
54
+
55
+
56
+ @patch("aws_bootstrap.ssh.subprocess.run", side_effect=subprocess.TimeoutExpired(cmd="ssh", timeout=15))
57
+ def test_query_gpu_info_timeout(mock_run):
58
+ """TimeoutExpired returns None."""
59
+ info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
60
+ assert info is None
61
+
62
+
63
+ @patch("aws_bootstrap.ssh.subprocess.run")
64
+ def test_query_gpu_info_malformed_output(mock_run):
65
+ """Garbage output returns None."""
66
+ mock_run.return_value = subprocess.CompletedProcess(
67
+ args=[], returncode=0, stdout="not valid gpu output\n", stderr=""
68
+ )
69
+
70
+ info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
71
+ assert info is None
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # GPU architecture mapping
76
+ # ---------------------------------------------------------------------------
77
+
78
+
79
+ def test_gpu_architecture_mapping():
80
+ """Known compute capabilities map to correct architecture names."""
81
+ assert _GPU_ARCHITECTURES["7.5"] == "Turing"
82
+ assert _GPU_ARCHITECTURES["8.0"] == "Ampere"
83
+ assert _GPU_ARCHITECTURES["8.6"] == "Ampere"
84
+ assert _GPU_ARCHITECTURES["8.9"] == "Ada Lovelace"
85
+ assert _GPU_ARCHITECTURES["9.0"] == "Hopper"
86
+ assert _GPU_ARCHITECTURES["7.0"] == "Volta"
87
+
88
+
89
+ @patch("aws_bootstrap.ssh.subprocess.run")
90
+ def test_query_gpu_info_unknown_architecture(mock_run):
91
+ """Unknown compute capability produces a fallback architecture string."""
92
+ mock_run.return_value = subprocess.CompletedProcess(
93
+ args=[], returncode=0, stdout="550.00.00, Future GPU, 10.0\n13.0\n13.0\n", stderr=""
94
+ )
95
+
96
+ info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
97
+ assert info is not None
98
+ assert info.architecture == "Unknown (10.0)"
@@ -10,6 +10,7 @@ from aws_bootstrap.ssh import (
10
10
  _read_ssh_config,
11
11
  add_ssh_host,
12
12
  find_ssh_alias,
13
+ get_ssh_host_details,
13
14
  list_ssh_hosts,
14
15
  remove_ssh_host,
15
16
  )
@@ -295,3 +296,38 @@ def test_list_hosts_nonexistent_file(tmp_path):
295
296
  def test_remove_nonexistent_file(tmp_path):
296
297
  cfg = tmp_path / "no_such_file"
297
298
  assert remove_ssh_host("i-abc123", config_path=cfg) is None
299
+
300
+
301
+ # ---------------------------------------------------------------------------
302
+ # Port in stanza / details
303
+ # ---------------------------------------------------------------------------
304
+
305
+
306
+ def test_stanza_includes_port_when_non_default(tmp_path):
307
+ cfg = _config_path(tmp_path)
308
+ add_ssh_host("i-abc123", "1.2.3.4", "ubuntu", KEY_PATH, config_path=cfg, port=2222)
309
+ content = cfg.read_text()
310
+ assert "Port 2222" in content
311
+
312
+
313
+ def test_stanza_omits_port_when_default(tmp_path):
314
+ cfg = _config_path(tmp_path)
315
+ add_ssh_host("i-abc123", "1.2.3.4", "ubuntu", KEY_PATH, config_path=cfg)
316
+ content = cfg.read_text()
317
+ assert "Port" not in content
318
+
319
+
320
+ def test_get_ssh_host_details_parses_port(tmp_path):
321
+ cfg = _config_path(tmp_path)
322
+ add_ssh_host("i-abc123", "1.2.3.4", "ubuntu", KEY_PATH, config_path=cfg, port=2222)
323
+ details = get_ssh_host_details("i-abc123", config_path=cfg)
324
+ assert details is not None
325
+ assert details.port == 2222
326
+
327
+
328
+ def test_get_ssh_host_details_default_port(tmp_path):
329
+ cfg = _config_path(tmp_path)
330
+ add_ssh_host("i-abc123", "1.2.3.4", "ubuntu", KEY_PATH, config_path=cfg)
331
+ details = get_ssh_host_details("i-abc123", config_path=cfg)
332
+ assert details is not None
333
+ assert details.port == 22
@@ -1,16 +1,11 @@
1
- """Tests for GPU info queries via SSH (get_ssh_host_details, query_gpu_info)."""
1
+ """Tests for get_ssh_host_details (SSH config parsing)."""
2
2
 
3
3
  from __future__ import annotations
4
- import subprocess
5
4
  from pathlib import Path
6
- from unittest.mock import patch
7
5
 
8
6
  from aws_bootstrap.ssh import (
9
- _GPU_ARCHITECTURES,
10
- GpuInfo,
11
7
  add_ssh_host,
12
8
  get_ssh_host_details,
13
- query_gpu_info,
14
9
  )
15
10
 
16
11
 
@@ -47,92 +42,3 @@ def test_get_ssh_host_details_nonexistent_file(tmp_path):
47
42
  """Returns None when the SSH config file doesn't exist."""
48
43
  cfg = tmp_path / "no_such_file"
49
44
  assert get_ssh_host_details("i-abc123", config_path=cfg) is None
50
-
51
-
52
- # ---------------------------------------------------------------------------
53
- # query_gpu_info
54
- # ---------------------------------------------------------------------------
55
-
56
- NVIDIA_SMI_OUTPUT = "560.35.03, Tesla T4, 7.5\n12.8\n12.6\n"
57
-
58
-
59
- @patch("aws_bootstrap.ssh.subprocess.run")
60
- def test_query_gpu_info_success(mock_run):
61
- """Successful nvidia-smi + nvcc output returns a valid GpuInfo."""
62
- mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=NVIDIA_SMI_OUTPUT, stderr="")
63
-
64
- info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
65
- assert info is not None
66
- assert isinstance(info, GpuInfo)
67
- assert info.driver_version == "560.35.03"
68
- assert info.cuda_driver_version == "12.8"
69
- assert info.cuda_toolkit_version == "12.6"
70
- assert info.gpu_name == "Tesla T4"
71
- assert info.compute_capability == "7.5"
72
- assert info.architecture == "Turing"
73
-
74
-
75
- @patch("aws_bootstrap.ssh.subprocess.run")
76
- def test_query_gpu_info_no_nvcc(mock_run):
77
- """When nvcc is unavailable, cuda_toolkit_version is None."""
78
- output = "560.35.03, Tesla T4, 7.5\n12.8\nN/A\n"
79
- mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout=output, stderr="")
80
-
81
- info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
82
- assert info is not None
83
- assert info.cuda_driver_version == "12.8"
84
- assert info.cuda_toolkit_version is None
85
-
86
-
87
- @patch("aws_bootstrap.ssh.subprocess.run")
88
- def test_query_gpu_info_ssh_failure(mock_run):
89
- """Non-zero exit code returns None."""
90
- mock_run.return_value = subprocess.CompletedProcess(args=[], returncode=255, stdout="", stderr="Connection refused")
91
-
92
- info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
93
- assert info is None
94
-
95
-
96
- @patch("aws_bootstrap.ssh.subprocess.run", side_effect=subprocess.TimeoutExpired(cmd="ssh", timeout=15))
97
- def test_query_gpu_info_timeout(mock_run):
98
- """TimeoutExpired returns None."""
99
- info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
100
- assert info is None
101
-
102
-
103
- @patch("aws_bootstrap.ssh.subprocess.run")
104
- def test_query_gpu_info_malformed_output(mock_run):
105
- """Garbage output returns None."""
106
- mock_run.return_value = subprocess.CompletedProcess(
107
- args=[], returncode=0, stdout="not valid gpu output\n", stderr=""
108
- )
109
-
110
- info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
111
- assert info is None
112
-
113
-
114
- # ---------------------------------------------------------------------------
115
- # GPU architecture mapping
116
- # ---------------------------------------------------------------------------
117
-
118
-
119
- def test_gpu_architecture_mapping():
120
- """Known compute capabilities map to correct architecture names."""
121
- assert _GPU_ARCHITECTURES["7.5"] == "Turing"
122
- assert _GPU_ARCHITECTURES["8.0"] == "Ampere"
123
- assert _GPU_ARCHITECTURES["8.6"] == "Ampere"
124
- assert _GPU_ARCHITECTURES["8.9"] == "Ada Lovelace"
125
- assert _GPU_ARCHITECTURES["9.0"] == "Hopper"
126
- assert _GPU_ARCHITECTURES["7.0"] == "Volta"
127
-
128
-
129
- @patch("aws_bootstrap.ssh.subprocess.run")
130
- def test_query_gpu_info_unknown_architecture(mock_run):
131
- """Unknown compute capability produces a fallback architecture string."""
132
- mock_run.return_value = subprocess.CompletedProcess(
133
- args=[], returncode=0, stdout="550.00.00, Future GPU, 10.0\n13.0\n13.0\n", stderr=""
134
- )
135
-
136
- info = query_gpu_info("1.2.3.4", "ubuntu", Path("/home/user/.ssh/id_ed25519"))
137
- assert info is not None
138
- assert info.architecture == "Unknown (10.0)"
@@ -1,13 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aws-bootstrap-g4dn
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Bootstrap AWS EC2 GPU instances for hybrid local-remote development
5
5
  Author: Adam Ever-Hadani
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/promptromp/aws-bootstrap-g4dn
8
8
  Project-URL: Issues, https://github.com/promptromp/aws-bootstrap-g4dn/issues
9
9
  Keywords: aws,ec2,gpu,cuda,deep-learning,spot-instances,cli
10
- Requires-Python: >=3.14
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: 3.14
13
+ Requires-Python: >=3.12
11
14
  Description-Content-Type: text/markdown
12
15
  License-File: LICENSE
13
16
  Requires-Dist: boto3>=1.35
@@ -55,7 +58,7 @@ ssh aws-gpu1 # You're in, venv activated, PyTorch works
55
58
 
56
59
  1. AWS profile configured with relevant permissions (profile name can be passed via `--profile` or read from `AWS_PROFILE` env var)
57
60
  2. AWS CLI v2 — see [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)
58
- 3. Python 3.14+ and [uv](https://github.com/astral-sh/uv)
61
+ 3. Python 3.12+ and [uv](https://github.com/astral-sh/uv)
59
62
  4. An SSH key pair (see below)
60
63
 
61
64
  ## Installation
@@ -123,6 +126,12 @@ aws-bootstrap launch --on-demand --instance-type g5.xlarge --region us-east-1
123
126
  # Launch without running the remote setup script
124
127
  aws-bootstrap launch --no-setup
125
128
 
129
+ # Use a specific Python version in the remote venv
130
+ aws-bootstrap launch --python-version 3.13
131
+
132
+ # Use a non-default SSH port
133
+ aws-bootstrap launch --ssh-port 2222
134
+
126
135
  # Use a specific AWS profile
127
136
  aws-bootstrap launch --profile my-aws-profile
128
137
  ```
@@ -146,7 +155,7 @@ The setup script runs automatically on the instance after SSH becomes available:
146
155
  |------|------|
147
156
  | **GPU verify** | Confirms `nvidia-smi` and `nvcc` are working |
148
157
  | **Utilities** | Installs `htop`, `tmux`, `tree`, `jq` |
149
- | **Python venv** | Creates `~/venv` with `uv`, auto-activates in `~/.bashrc` |
158
+ | **Python venv** | Creates `~/venv` with `uv`, auto-activates in `~/.bashrc`. Use `--python-version` to pin a specific Python (e.g. `3.13`) |
150
159
  | **CUDA-aware PyTorch** | Detects CUDA toolkit version → installs PyTorch from the matching `cu{TAG}` wheel index |
151
160
  | **CUDA smoke test** | Runs `torch.cuda.is_available()` + GPU matmul to verify the stack |
152
161
  | **GPU benchmark** | Copies `gpu_benchmark.py` to `~/gpu_benchmark.py` |
@@ -220,6 +229,9 @@ aws-bootstrap status
220
229
  # Include GPU info (CUDA toolkit + driver version, GPU name, architecture) via SSH
221
230
  aws-bootstrap status --gpu
222
231
 
232
+ # Hide connection commands (shown by default for each running instance)
233
+ aws-bootstrap status --no-instructions
234
+
223
235
  # List instances in a specific region
224
236
  aws-bootstrap status --region us-east-1
225
237
 
@@ -0,0 +1,24 @@
1
+ aws_bootstrap/__init__.py,sha256=kl_jvrunGyIyizdRqAP6ROb5P1BBrXX5PTq5gq1ipU0,82
2
+ aws_bootstrap/cli.py,sha256=H7Lud1PWk0O5zKGf1StARCEahrMErickuHXsWk42j3A,20481
3
+ aws_bootstrap/config.py,sha256=TeCOYDlijT-KD5SFIzc-VvBhOqcq9YCgen9NK63rka8,895
4
+ aws_bootstrap/ec2.py,sha256=LHpzW91ayK45gsWV_B4LanSZIhWggqTsL31qHUceiaA,12274
5
+ aws_bootstrap/gpu.py,sha256=WTnHR0s3mQHDlnzqRgqAC6omWz7nT5YtGpcs0Bf88jk,692
6
+ aws_bootstrap/ssh.py,sha256=RK5Ahiwpol9-4MUvurKyNa1JorQW9VkkNtSSfPzryrU,17851
7
+ aws_bootstrap/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ aws_bootstrap/resources/gpu_benchmark.py,sha256=2uoss2bZGhg7c3D7Hg1-EJlOVDtzAH4co1ahSvF_lVU,29080
9
+ aws_bootstrap/resources/gpu_smoke_test.ipynb,sha256=XvAOEIPa5H9ri5mRZqOdknmwOwKNvCME6DzBGuhRYfg,10698
10
+ aws_bootstrap/resources/remote_setup.sh,sha256=n1joNO-6EizLsz2BPOPruFhe90kEQ9Np2SBhYXnOJRs,5648
11
+ aws_bootstrap/resources/requirements.txt,sha256=gpYl1MFCfWXiAhbIUgAjuTHONz3MKci25msIyOkMmUk,75
12
+ aws_bootstrap/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ aws_bootstrap/tests/test_cli.py,sha256=vyoVVqSakC7Y2BCEFpyf2ghTUvT-QWBQC9-yvEFz3gw,32554
14
+ aws_bootstrap/tests/test_config.py,sha256=arvET6KNl4Vqsz0zFrSdhciXGU688bfsvCr3dSpziN0,1050
15
+ aws_bootstrap/tests/test_ec2.py,sha256=Jmqsjv973hxXbZWfGgECtm6aa2156Lzji227sYMBuMg,10547
16
+ aws_bootstrap/tests/test_gpu.py,sha256=rbMuda_sIVbaCzkWXoLv9YIfnWztgRoP7NuVL8XHrUY,3871
17
+ aws_bootstrap/tests/test_ssh_config.py,sha256=iQDd3hJ8to-2-QHW26Brtglfl0q0P6sCE6U_itxoNyY,11609
18
+ aws_bootstrap/tests/test_ssh_gpu.py,sha256=dRp86Og-8GqiATSff3rxhu83mBZdGgqI4UOnoC00Ln0,1454
19
+ aws_bootstrap_g4dn-0.3.0.dist-info/licenses/LICENSE,sha256=Hen77Mt8sazSQJ9DgrmZuAvDwo2vc5JAkR_avuFV-CM,1067
20
+ aws_bootstrap_g4dn-0.3.0.dist-info/METADATA,sha256=tfsBYTSqVQf8A46P22qwdFsb_ur-Ge57hQfuDaj0mgE,12417
21
+ aws_bootstrap_g4dn-0.3.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
22
+ aws_bootstrap_g4dn-0.3.0.dist-info/entry_points.txt,sha256=T8FXfOgmLEvFi8DHaFJ3tCzId9J3_d2Y6qT98OXxCjA,57
23
+ aws_bootstrap_g4dn-0.3.0.dist-info/top_level.txt,sha256=mix9gZRs8JUv0OMSB_rwdGcRnTKzsKgHrE5fyAn5zJw,14
24
+ aws_bootstrap_g4dn-0.3.0.dist-info/RECORD,,
@@ -1,22 +0,0 @@
1
- aws_bootstrap/__init__.py,sha256=kl_jvrunGyIyizdRqAP6ROb5P1BBrXX5PTq5gq1ipU0,82
2
- aws_bootstrap/cli.py,sha256=3PWGU4djqCvABNpLvYTk473Nmmmrad3JQ3iQtg5YmnE,17917
3
- aws_bootstrap/config.py,sha256=bOADtpujEacED0pu9m7D781UFlMhZrmtHQ7eqI6ySjk,834
4
- aws_bootstrap/ec2.py,sha256=-yEyGMCycY4ccsmbgqHnLK2FRFWX2kr7nLfYWXSKeaY,12242
5
- aws_bootstrap/ssh.py,sha256=-8F0PAkl7CCY1b9n46ZhWJ6faIMlSvA26BleeIp-rMA,17533
6
- aws_bootstrap/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- aws_bootstrap/resources/gpu_benchmark.py,sha256=2uoss2bZGhg7c3D7Hg1-EJlOVDtzAH4co1ahSvF_lVU,29080
8
- aws_bootstrap/resources/gpu_smoke_test.ipynb,sha256=XvAOEIPa5H9ri5mRZqOdknmwOwKNvCME6DzBGuhRYfg,10698
9
- aws_bootstrap/resources/remote_setup.sh,sha256=FzpXEw-LvlXROi-PmO72yEyDWWi-3Tul6D7-vFDubXQ,5460
10
- aws_bootstrap/resources/requirements.txt,sha256=gpYl1MFCfWXiAhbIUgAjuTHONz3MKci25msIyOkMmUk,75
11
- aws_bootstrap/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- aws_bootstrap/tests/test_cli.py,sha256=xGCC07aPZMc5pExo__qz7X1Tm2v9Z1Xn4K99JocESas,23627
13
- aws_bootstrap/tests/test_config.py,sha256=arvET6KNl4Vqsz0zFrSdhciXGU688bfsvCr3dSpziN0,1050
14
- aws_bootstrap/tests/test_ec2.py,sha256=Jmqsjv973hxXbZWfGgECtm6aa2156Lzji227sYMBuMg,10547
15
- aws_bootstrap/tests/test_ssh_config.py,sha256=Rt3e7B22d8kK0PzFgXB4gwpF4HvIseiqzcpouCwMo5M,10333
16
- aws_bootstrap/tests/test_ssh_gpu.py,sha256=W6GQzILCy_qPrvWQlCC8Ris-vuTzTGiyNXEyMzwD1kM,5154
17
- aws_bootstrap_g4dn-0.2.0.dist-info/licenses/LICENSE,sha256=Hen77Mt8sazSQJ9DgrmZuAvDwo2vc5JAkR_avuFV-CM,1067
18
- aws_bootstrap_g4dn-0.2.0.dist-info/METADATA,sha256=XqtBIr0EdnRvsy83usvZ5n-B9WNpfLCGoVaVR9_1eaI,11927
19
- aws_bootstrap_g4dn-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
20
- aws_bootstrap_g4dn-0.2.0.dist-info/entry_points.txt,sha256=T8FXfOgmLEvFi8DHaFJ3tCzId9J3_d2Y6qT98OXxCjA,57
21
- aws_bootstrap_g4dn-0.2.0.dist-info/top_level.txt,sha256=mix9gZRs8JUv0OMSB_rwdGcRnTKzsKgHrE5fyAn5zJw,14
22
- aws_bootstrap_g4dn-0.2.0.dist-info/RECORD,,