aws-bootstrap-g4dn 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aws_bootstrap/cli.py CHANGED
@@ -113,6 +113,12 @@ def main():
113
113
  @click.option("--no-setup", is_flag=True, default=False, help="Skip running the remote setup script.")
114
114
  @click.option("--dry-run", is_flag=True, default=False, help="Show what would be done without executing.")
115
115
  @click.option("--profile", default=None, help="AWS profile override (defaults to AWS_PROFILE env var).")
116
+ @click.option(
117
+ "--python-version",
118
+ default=None,
119
+ help="Python version for the remote venv (e.g. 3.13, 3.14.2). Passed to uv during setup.",
120
+ )
121
+ @click.option("--ssh-port", default=22, show_default=True, type=int, help="SSH port on the remote instance.")
116
122
  def launch(
117
123
  instance_type,
118
124
  ami_filter,
@@ -125,6 +131,8 @@ def launch(
125
131
  no_setup,
126
132
  dry_run,
127
133
  profile,
134
+ python_version,
135
+ ssh_port,
128
136
  ):
129
137
  """Launch a GPU-accelerated EC2 instance."""
130
138
  config = LaunchConfig(
@@ -137,6 +145,8 @@ def launch(
137
145
  volume_size=volume_size,
138
146
  run_setup=not no_setup,
139
147
  dry_run=dry_run,
148
+ ssh_port=ssh_port,
149
+ python_version=python_version,
140
150
  )
141
151
  if ami_filter:
142
152
  config.ami_filter = ami_filter
@@ -163,7 +173,7 @@ def launch(
163
173
 
164
174
  # Step 3: Security group
165
175
  step(3, 6, "Ensuring security group...")
166
- sg_id = ensure_security_group(ec2, config.security_group, config.tag_value)
176
+ sg_id = ensure_security_group(ec2, config.security_group, config.tag_value, ssh_port=config.ssh_port)
167
177
 
168
178
  pricing = "spot" if config.spot else "on-demand"
169
179
 
@@ -178,6 +188,10 @@ def launch(
178
188
  val("Volume", f"{config.volume_size} GB gp3")
179
189
  val("Region", config.region)
180
190
  val("Remote setup", "yes" if config.run_setup else "no")
191
+ if config.ssh_port != 22:
192
+ val("SSH port", str(config.ssh_port))
193
+ if config.python_version:
194
+ val("Python version", config.python_version)
181
195
  click.echo()
182
196
  click.secho("No resources launched (dry-run mode).", fg="yellow")
183
197
  return
@@ -202,9 +216,13 @@ def launch(
202
216
  # Step 6: SSH and remote setup
203
217
  step(6, 6, "Waiting for SSH access...")
204
218
  private_key = private_key_path(config.key_path)
205
- if not wait_for_ssh(public_ip, config.ssh_user, config.key_path):
219
+ if not wait_for_ssh(public_ip, config.ssh_user, config.key_path, port=config.ssh_port):
206
220
  warn("SSH did not become available within the timeout.")
207
- info(f"Instance is running try connecting manually: ssh -i {private_key} {config.ssh_user}@{public_ip}")
221
+ port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
222
+ info(
223
+ f"Instance is running — try connecting manually:"
224
+ f" ssh -i {private_key}{port_flag} {config.ssh_user}@{public_ip}"
225
+ )
208
226
  return
209
227
 
210
228
  if config.run_setup:
@@ -212,7 +230,9 @@ def launch(
212
230
  warn(f"Setup script not found at {SETUP_SCRIPT}, skipping.")
213
231
  else:
214
232
  info("Running remote setup...")
215
- if run_remote_setup(public_ip, config.ssh_user, config.key_path, SETUP_SCRIPT):
233
+ if run_remote_setup(
234
+ public_ip, config.ssh_user, config.key_path, SETUP_SCRIPT, config.python_version, port=config.ssh_port
235
+ ):
216
236
  success("Remote setup completed successfully.")
217
237
  else:
218
238
  warn("Remote setup failed. Instance is still running.")
@@ -224,6 +244,7 @@ def launch(
224
244
  user=config.ssh_user,
225
245
  key_path=config.key_path,
226
246
  alias_prefix=config.alias_prefix,
247
+ port=config.ssh_port,
227
248
  )
228
249
  success(f"Added SSH config alias: {alias}")
229
250
 
@@ -239,18 +260,27 @@ def launch(
239
260
  val("Pricing", pricing)
240
261
  val("SSH alias", alias)
241
262
 
263
+ port_flag = f" -p {config.ssh_port}" if config.ssh_port != 22 else ""
264
+
242
265
  click.echo()
243
266
  click.secho(" SSH:", fg="cyan")
244
- click.secho(f" ssh {alias}", bold=True)
245
- info(f"or: ssh -i {private_key} {config.ssh_user}@{public_ip}")
267
+ click.secho(f" ssh{port_flag} {alias}", bold=True)
268
+ info(f"or: ssh -i {private_key}{port_flag} {config.ssh_user}@{public_ip}")
246
269
 
247
270
  click.echo()
248
271
  click.secho(" Jupyter (via SSH tunnel):", fg="cyan")
249
- click.secho(f" ssh -NL 8888:localhost:8888 {alias}", bold=True)
250
- info(f"or: ssh -i {private_key} -NL 8888:localhost:8888 {config.ssh_user}@{public_ip}")
272
+ click.secho(f" ssh -NL 8888:localhost:8888{port_flag} {alias}", bold=True)
273
+ info(f"or: ssh -i {private_key} -NL 8888:localhost:8888{port_flag} {config.ssh_user}@{public_ip}")
251
274
  info("Then open: http://localhost:8888")
252
275
  info("Notebook: ~/gpu_smoke_test.ipynb (GPU smoke test)")
253
276
 
277
+ click.echo()
278
+ click.secho(" VSCode Remote SSH:", fg="cyan")
279
+ click.secho(
280
+ f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{config.ssh_user}/workspace",
281
+ bold=True,
282
+ )
283
+
254
284
  click.echo()
255
285
  click.secho(" GPU Benchmark:", fg="cyan")
256
286
  click.secho(f" ssh {alias} 'python ~/gpu_benchmark.py'", bold=True)
@@ -266,7 +296,14 @@ def launch(
266
296
  @click.option("--region", default="us-west-2", show_default=True, help="AWS region.")
267
297
  @click.option("--profile", default=None, help="AWS profile override.")
268
298
  @click.option("--gpu", is_flag=True, default=False, help="Query GPU info (CUDA, driver) via SSH.")
269
- def status(region, profile, gpu):
299
+ @click.option(
300
+ "--instructions/--no-instructions",
301
+ "-I",
302
+ default=True,
303
+ show_default=True,
304
+ help="Show connection commands (SSH, Jupyter, VSCode) for each running instance.",
305
+ )
306
+ def status(region, profile, gpu, instructions):
270
307
  """Show running instances created by aws-bootstrap."""
271
308
  session = boto3.Session(profile_name=profile, region_name=region)
272
309
  ec2 = session.client("ec2")
@@ -305,11 +342,15 @@ def status(region, profile, gpu):
305
342
  if inst["PublicIp"]:
306
343
  val(" IP", inst["PublicIp"])
307
344
 
345
+ # Look up SSH config details once (used by --gpu and --with-instructions)
346
+ details = None
347
+ if (gpu or instructions) and state == "running" and inst["PublicIp"]:
348
+ details = get_ssh_host_details(inst["InstanceId"])
349
+
308
350
  # GPU info (opt-in, only for running instances with a public IP)
309
351
  if gpu and state == "running" and inst["PublicIp"]:
310
- details = get_ssh_host_details(inst["InstanceId"])
311
352
  if details:
312
- gpu_info = query_gpu_info(details.hostname, details.user, details.identity_file)
353
+ gpu_info = query_gpu_info(details.hostname, details.user, details.identity_file, port=details.port)
313
354
  else:
314
355
  gpu_info = query_gpu_info(
315
356
  inst["PublicIp"],
@@ -353,6 +394,29 @@ def status(region, profile, gpu):
353
394
  val(" Est. cost", f"~${est_cost:.4f}")
354
395
 
355
396
  val(" Launched", str(inst["LaunchTime"]))
397
+
398
+ # Connection instructions (opt-in, only for running instances with a public IP and alias)
399
+ if instructions and state == "running" and inst["PublicIp"] and alias:
400
+ user = details.user if details else "ubuntu"
401
+ port = details.port if details else 22
402
+ port_flag = f" -p {port}" if port != 22 else ""
403
+
404
+ click.echo()
405
+ click.secho(" SSH:", fg="cyan")
406
+ click.secho(f" ssh{port_flag} {alias}", bold=True)
407
+
408
+ click.secho(" Jupyter (via SSH tunnel):", fg="cyan")
409
+ click.secho(f" ssh -NL 8888:localhost:8888{port_flag} {alias}", bold=True)
410
+
411
+ click.secho(" VSCode Remote SSH:", fg="cyan")
412
+ click.secho(
413
+ f" code --folder-uri vscode-remote://ssh-remote+{alias}/home/{user}/workspace",
414
+ bold=True,
415
+ )
416
+
417
+ click.secho(" GPU Benchmark:", fg="cyan")
418
+ click.secho(f" ssh {alias} 'python ~/gpu_benchmark.py'", bold=True)
419
+
356
420
  click.echo()
357
421
  first_id = instances[0]["InstanceId"]
358
422
  click.echo(" To terminate: " + click.style(f"aws-bootstrap terminate {first_id}", bold=True))
aws_bootstrap/config.py CHANGED
@@ -22,3 +22,5 @@ class LaunchConfig:
22
22
  ssh_user: str = "ubuntu"
23
23
  tag_value: str = "aws-bootstrap-g4dn"
24
24
  alias_prefix: str = "aws-gpu"
25
+ ssh_port: int = 22
26
+ python_version: str | None = None
aws_bootstrap/ec2.py CHANGED
@@ -59,7 +59,7 @@ def get_latest_ami(ec2_client, ami_filter: str) -> dict:
59
59
  return images[0]
60
60
 
61
61
 
62
- def ensure_security_group(ec2_client, name: str, tag_value: str) -> str:
62
+ def ensure_security_group(ec2_client, name: str, tag_value: str, ssh_port: int = 22) -> str:
63
63
  """Find or create a security group with SSH ingress in the default VPC."""
64
64
  # Find default VPC
65
65
  vpcs = ec2_client.describe_vpcs(Filters=[{"Name": "isDefault", "Values": ["true"]}])
@@ -103,8 +103,8 @@ def ensure_security_group(ec2_client, name: str, tag_value: str) -> str:
103
103
  IpPermissions=[
104
104
  {
105
105
  "IpProtocol": "tcp",
106
- "FromPort": 22,
107
- "ToPort": 22,
106
+ "FromPort": ssh_port,
107
+ "ToPort": ssh_port,
108
108
  "IpRanges": [{"CidrIp": "0.0.0.0/0", "Description": "SSH access"}],
109
109
  }
110
110
  ],
aws_bootstrap/gpu.py ADDED
@@ -0,0 +1,27 @@
1
+ """GPU architecture mapping and GPU info dataclass."""
2
+
3
+ from __future__ import annotations
4
+ from dataclasses import dataclass
5
+
6
+
7
+ _GPU_ARCHITECTURES: dict[str, str] = {
8
+ "7.0": "Volta",
9
+ "7.5": "Turing",
10
+ "8.0": "Ampere",
11
+ "8.6": "Ampere",
12
+ "8.7": "Ampere",
13
+ "8.9": "Ada Lovelace",
14
+ "9.0": "Hopper",
15
+ }
16
+
17
+
18
+ @dataclass
19
+ class GpuInfo:
20
+ """GPU information retrieved via nvidia-smi and nvcc."""
21
+
22
+ driver_version: str
23
+ cuda_driver_version: str # max CUDA version supported by driver (from nvidia-smi)
24
+ cuda_toolkit_version: str | None # actual CUDA toolkit installed (from nvcc), None if unavailable
25
+ gpu_name: str
26
+ compute_capability: str
27
+ architecture: str
@@ -628,7 +628,9 @@ def configure_precision(device: torch.device, requested: PrecisionMode) -> Preci
628
628
  return PrecisionMode.FP32
629
629
 
630
630
 
631
- def print_system_info(requested_precision: PrecisionMode) -> tuple[torch.device, PrecisionMode]:
631
+ def print_system_info(
632
+ requested_precision: PrecisionMode, force_cpu: bool = False
633
+ ) -> tuple[torch.device, PrecisionMode]:
632
634
  """Print system and CUDA information, return device and actual precision mode."""
633
635
  print("\n" + "=" * 60)
634
636
  print("System Information")
@@ -636,7 +638,7 @@ def print_system_info(requested_precision: PrecisionMode) -> tuple[torch.device,
636
638
  print(f"PyTorch version: {torch.__version__}")
637
639
  print(f"Python version: {sys.version.split()[0]}")
638
640
 
639
- if torch.cuda.is_available():
641
+ if torch.cuda.is_available() and not force_cpu:
640
642
  device = torch.device("cuda")
641
643
  print("CUDA available: Yes")
642
644
  print(f"CUDA version: {torch.version.cuda}")
@@ -666,8 +668,11 @@ def print_system_info(requested_precision: PrecisionMode) -> tuple[torch.device,
666
668
  else:
667
669
  device = torch.device("cpu")
668
670
  actual_precision = PrecisionMode.FP32
669
- print("CUDA available: No (running on CPU)")
670
- print("WARNING: GPU benchmark results will not be representative!")
671
+ if force_cpu:
672
+ print("CPU-only mode requested (--cpu flag)")
673
+ else:
674
+ print("CUDA available: No (running on CPU)")
675
+ print("Running on CPU for benchmarking")
671
676
 
672
677
  print("=" * 60)
673
678
  return device, actual_precision
@@ -724,10 +729,15 @@ def main() -> None:
724
729
  action="store_true",
725
730
  help="Run CUDA/cuBLAS diagnostic tests before benchmarking",
726
731
  )
732
+ parser.add_argument(
733
+ "--cpu",
734
+ action="store_true",
735
+ help="Force CPU-only execution (for CPU vs GPU comparison)",
736
+ )
727
737
  args = parser.parse_args()
728
738
 
729
739
  requested_precision = PrecisionMode(args.precision)
730
- device, actual_precision = print_system_info(requested_precision)
740
+ device, actual_precision = print_system_info(requested_precision, force_cpu=args.cpu)
731
741
 
732
742
  # Run diagnostics if requested
733
743
  if args.diagnose:
@@ -0,0 +1,42 @@
1
+ {
2
+ // CUDA debug configurations for VSCode
3
+ // Deployed to: ~/workspace/.vscode/launch.json
4
+ //
5
+ // Usage: Open any .cu file, press F5 to build and debug
6
+ "version": "0.2.0",
7
+ "configurations": [
8
+ {
9
+ "name": "CUDA: Build and Debug Active File",
10
+ "type": "cuda-gdb",
11
+ "request": "launch",
12
+ "program": "${fileDirname}/${fileBasenameNoExtension}",
13
+ "args": [],
14
+ "cwd": "${fileDirname}",
15
+ "miDebuggerPath": "__CUDA_GDB_PATH__",
16
+ "stopAtEntry": false,
17
+ "preLaunchTask": "nvcc: build active file (debug)"
18
+ },
19
+ {
20
+ "name": "CUDA: Build and Debug (stop at main)",
21
+ "type": "cuda-gdb",
22
+ "request": "launch",
23
+ "program": "${fileDirname}/${fileBasenameNoExtension}",
24
+ "args": [],
25
+ "cwd": "${fileDirname}",
26
+ "miDebuggerPath": "__CUDA_GDB_PATH__",
27
+ "stopAtEntry": true,
28
+ "preLaunchTask": "nvcc: build active file (debug)"
29
+ },
30
+ {
31
+ "name": "CUDA: Run Active File (no debug)",
32
+ "type": "cuda-gdb",
33
+ "request": "launch",
34
+ "program": "${fileDirname}/${fileBasenameNoExtension}",
35
+ "args": [],
36
+ "cwd": "${fileDirname}",
37
+ "miDebuggerPath": "__CUDA_GDB_PATH__",
38
+ "stopAtEntry": false,
39
+ "preLaunchTask": "nvcc: build active file (release)"
40
+ }
41
+ ]
42
+ }
@@ -7,7 +7,7 @@ echo "=== aws-bootstrap-g4dn remote setup ==="
7
7
 
8
8
  # 1. Verify GPU
9
9
  echo ""
10
- echo "[1/5] Verifying GPU and CUDA..."
10
+ echo "[1/6] Verifying GPU and CUDA..."
11
11
  if command -v nvidia-smi &>/dev/null; then
12
12
  nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader
13
13
  else
@@ -20,21 +20,52 @@ else
20
20
  echo "WARNING: nvcc not found (CUDA toolkit may not be installed)"
21
21
  fi
22
22
 
23
+ # Make Nsight Systems (nsys) available on PATH if installed under /opt/nvidia
24
+ if ! command -v nsys &>/dev/null; then
25
+ NSIGHT_DIR="/opt/nvidia/nsight-systems"
26
+ if [ -d "$NSIGHT_DIR" ]; then
27
+ # Fix permissions — the parent dir is often root-only (drwx------)
28
+ sudo chmod o+rx "$NSIGHT_DIR"
29
+ # Find the latest version directory (lexicographic sort)
30
+ NSYS_VERSION=$(ls -1 "$NSIGHT_DIR" | sort -V | tail -1)
31
+ if [ -n "$NSYS_VERSION" ] && [ -x "$NSIGHT_DIR/$NSYS_VERSION/bin/nsys" ]; then
32
+ NSYS_BIN="$NSIGHT_DIR/$NSYS_VERSION/bin"
33
+ if ! grep -q "nsight-systems" ~/.bashrc 2>/dev/null; then
34
+ echo "export PATH=\"$NSYS_BIN:\$PATH\"" >> ~/.bashrc
35
+ fi
36
+ export PATH="$NSYS_BIN:$PATH"
37
+ echo " Nsight Systems $NSYS_VERSION added to PATH ($NSYS_BIN)"
38
+ else
39
+ echo " WARNING: Nsight Systems directory found but no nsys binary"
40
+ fi
41
+ else
42
+ echo " Nsight Systems not found at $NSIGHT_DIR"
43
+ fi
44
+ else
45
+ echo " nsys already on PATH: $(command -v nsys)"
46
+ fi
47
+
23
48
  # 2. Install utilities
24
49
  echo ""
25
- echo "[2/5] Installing utilities..."
50
+ echo "[2/6] Installing utilities..."
26
51
  sudo apt-get update -qq
27
52
  sudo apt-get install -y -qq htop tmux tree jq
28
53
 
29
54
  # 3. Set up Python environment with uv
30
55
  echo ""
31
- echo "[3/5] Setting up Python environment with uv..."
56
+ echo "[3/6] Setting up Python environment with uv..."
32
57
  if ! command -v uv &>/dev/null; then
33
58
  curl -LsSf https://astral.sh/uv/install.sh | sh
34
59
  fi
35
60
  export PATH="$HOME/.local/bin:$PATH"
36
61
 
37
- uv venv ~/venv
62
+ if [ -n "${PYTHON_VERSION:-}" ]; then
63
+ echo " Installing Python ${PYTHON_VERSION}..."
64
+ uv python install "$PYTHON_VERSION"
65
+ uv venv --python "$PYTHON_VERSION" ~/venv
66
+ else
67
+ uv venv ~/venv
68
+ fi
38
69
 
39
70
  # --- CUDA-aware PyTorch installation ---
40
71
  # Known PyTorch CUDA wheel tags (ascending order).
@@ -147,7 +178,7 @@ echo " Jupyter config written to $JUPYTER_CONFIG_DIR/jupyter_lab_config.py"
147
178
 
148
179
  # 4. Jupyter systemd service
149
180
  echo ""
150
- echo "[4/5] Setting up Jupyter systemd service..."
181
+ echo "[4/6] Setting up Jupyter systemd service..."
151
182
  LOGIN_USER=$(whoami)
152
183
 
153
184
  sudo tee /etc/systemd/system/jupyter.service > /dev/null << SVCEOF
@@ -174,7 +205,7 @@ echo " Jupyter service started (port 8888)"
174
205
 
175
206
  # 5. SSH keepalive
176
207
  echo ""
177
- echo "[5/5] Configuring SSH keepalive..."
208
+ echo "[5/6] Configuring SSH keepalive..."
178
209
  if ! grep -q "ClientAliveInterval" /etc/ssh/sshd_config; then
179
210
  echo "ClientAliveInterval 60" | sudo tee -a /etc/ssh/sshd_config > /dev/null
180
211
  echo "ClientAliveCountMax 10" | sudo tee -a /etc/ssh/sshd_config > /dev/null
@@ -184,5 +215,58 @@ else
184
215
  echo " SSH keepalive already configured"
185
216
  fi
186
217
 
218
+ # 6. VSCode workspace setup
219
+ echo ""
220
+ echo "[6/6] Setting up VSCode workspace..."
221
+ mkdir -p ~/workspace/.vscode
222
+
223
+ # Detect cuda-gdb path
224
+ CUDA_GDB_PATH=""
225
+ if command -v cuda-gdb &>/dev/null; then
226
+ CUDA_GDB_PATH=$(command -v cuda-gdb)
227
+ elif [ -x /usr/local/cuda/bin/cuda-gdb ]; then
228
+ CUDA_GDB_PATH="/usr/local/cuda/bin/cuda-gdb"
229
+ else
230
+ # Try glob for versioned CUDA installs
231
+ for p in /usr/local/cuda-*/bin/cuda-gdb; do
232
+ if [ -x "$p" ]; then
233
+ CUDA_GDB_PATH="$p"
234
+ fi
235
+ done
236
+ fi
237
+ if [ -z "$CUDA_GDB_PATH" ]; then
238
+ echo " WARNING: cuda-gdb not found — using placeholder in launch.json"
239
+ CUDA_GDB_PATH="cuda-gdb"
240
+ else
241
+ echo " cuda-gdb: $CUDA_GDB_PATH"
242
+ fi
243
+
244
+ # Detect GPU SM architecture
245
+ GPU_ARCH=""
246
+ if command -v nvidia-smi &>/dev/null; then
247
+ COMPUTE_CAP=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d '[:space:]')
248
+ if [ -n "$COMPUTE_CAP" ]; then
249
+ GPU_ARCH="sm_$(echo "$COMPUTE_CAP" | tr -d '.')"
250
+ fi
251
+ fi
252
+ if [ -z "$GPU_ARCH" ]; then
253
+ echo " WARNING: Could not detect GPU arch — defaulting to sm_75"
254
+ GPU_ARCH="sm_75"
255
+ else
256
+ echo " GPU arch: $GPU_ARCH"
257
+ fi
258
+
259
+ # Copy example CUDA source into workspace
260
+ cp /tmp/saxpy.cu ~/workspace/saxpy.cu
261
+ echo " Deployed saxpy.cu"
262
+
263
+ # Deploy launch.json with cuda-gdb path
264
+ sed "s|__CUDA_GDB_PATH__|${CUDA_GDB_PATH}|g" /tmp/launch.json > ~/workspace/.vscode/launch.json
265
+ echo " Deployed launch.json"
266
+
267
+ # Deploy tasks.json with GPU architecture
268
+ sed "s|__GPU_ARCH__|${GPU_ARCH}|g" /tmp/tasks.json > ~/workspace/.vscode/tasks.json
269
+ echo " Deployed tasks.json"
270
+
187
271
  echo ""
188
272
  echo "=== Remote setup complete ==="
@@ -0,0 +1,49 @@
1
+ /**
2
+ * SAXPY Example, CUDA Style
3
+ * Source: https://developer.nvidia.com/blog/easy-introduction-cuda-c-and-c/
4
+ *
5
+ * This is included as an example CUDA C++ source file to try out the VS Code launch configuration we include on the host machine.
6
+ *
7
+ */
8
+ #include <stdio.h>
9
+
10
+ __global__
11
+ void saxpy(int n, float a, float *x, float *y)
12
+ {
13
+ int i = blockIdx.x*blockDim.x + threadIdx.x;
14
+ if (i < n) y[i] = a*x[i] + y[i];
15
+ }
16
+
17
+ int main(void)
18
+ {
19
+ int N = 1<<20;
20
+ float *x, *y, *d_x, *d_y;
21
+ x = (float*)malloc(N*sizeof(float));
22
+ y = (float*)malloc(N*sizeof(float));
23
+
24
+ cudaMalloc(&d_x, N*sizeof(float));
25
+ cudaMalloc(&d_y, N*sizeof(float));
26
+
27
+ for (int i = 0; i < N; i++) {
28
+ x[i] = 1.0f;
29
+ y[i] = 2.0f;
30
+ }
31
+
32
+ cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
33
+ cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
34
+
35
+ // Perform SAXPY on 1M elements
36
+ saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
37
+
38
+ cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
39
+
40
+ float maxError = 0.0f;
41
+ for (int i = 0; i < N; i++)
42
+ maxError = max(maxError, abs(y[i]-4.0f));
43
+ printf("Max error: %f\n", maxError);
44
+
45
+ cudaFree(d_x);
46
+ cudaFree(d_y);
47
+ free(x);
48
+ free(y);
49
+ }
@@ -0,0 +1,48 @@
1
+ {
2
+ // CUDA build tasks for VSCode
3
+ // Deployed to: ~/workspace/.vscode/tasks.json
4
+ "version": "2.0.0",
5
+ "tasks": [
6
+ {
7
+ "label": "nvcc: build active file (debug)",
8
+ "type": "shell",
9
+ "command": "nvcc",
10
+ "args": [
11
+ "-g", // Host debug symbols
12
+ "-G", // Device (GPU) debug symbols
13
+ "-O0", // No optimization
14
+ "-arch=__GPU_ARCH__", // GPU arch (auto-detected)
15
+ "-o",
16
+ "${fileDirname}/${fileBasenameNoExtension}",
17
+ "${file}"
18
+ ],
19
+ "options": {
20
+ "cwd": "${fileDirname}"
21
+ },
22
+ "problemMatcher": ["$nvcc"],
23
+ "group": {
24
+ "kind": "build",
25
+ "isDefault": true
26
+ },
27
+ "detail": "Compile active .cu file with debug symbols (-g -G)"
28
+ },
29
+ {
30
+ "label": "nvcc: build active file (release)",
31
+ "type": "shell",
32
+ "command": "nvcc",
33
+ "args": [
34
+ "-O3",
35
+ "-arch=__GPU_ARCH__",
36
+ "-o",
37
+ "${fileDirname}/${fileBasenameNoExtension}",
38
+ "${file}"
39
+ ],
40
+ "options": {
41
+ "cwd": "${fileDirname}"
42
+ },
43
+ "problemMatcher": ["$nvcc"],
44
+ "group": "build",
45
+ "detail": "Compile active .cu file optimized (no debug)"
46
+ }
47
+ ]
48
+ }