alloc 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {alloc-0.2.0 → alloc-0.3.0}/PKG-INFO +25 -5
  2. {alloc-0.2.0 → alloc-0.3.0}/README.md +24 -4
  3. {alloc-0.2.0 → alloc-0.3.0}/pyproject.toml +4 -1
  4. alloc-0.3.0/src/alloc/__init__.py +10 -0
  5. {alloc-0.2.0 → alloc-0.3.0}/src/alloc/artifact_writer.py +5 -1
  6. {alloc-0.2.0 → alloc-0.3.0}/src/alloc/callbacks.py +5 -0
  7. alloc-0.3.0/src/alloc/catalog/__init__.py +109 -0
  8. alloc-0.3.0/src/alloc/catalog/default_rate_card.json +18 -0
  9. alloc-0.3.0/src/alloc/catalog/gpus.v1.json +174 -0
  10. {alloc-0.2.0 → alloc-0.3.0}/src/alloc/cli.py +280 -35
  11. alloc-0.3.0/src/alloc/context.py +191 -0
  12. alloc-0.3.0/src/alloc/display.py +510 -0
  13. {alloc-0.2.0 → alloc-0.3.0}/src/alloc/ghost.py +5 -1
  14. alloc-0.3.0/src/alloc/probe.py +449 -0
  15. alloc-0.3.0/src/alloc/stability.py +144 -0
  16. {alloc-0.2.0 → alloc-0.3.0}/src/alloc/upload.py +19 -1
  17. {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/PKG-INFO +25 -5
  18. {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/SOURCES.txt +14 -1
  19. alloc-0.3.0/tests/test_artifact.py +128 -0
  20. alloc-0.3.0/tests/test_catalog.py +83 -0
  21. alloc-0.3.0/tests/test_cli.py +130 -0
  22. alloc-0.3.0/tests/test_context.py +135 -0
  23. {alloc-0.2.0 → alloc-0.3.0}/tests/test_ghost.py +9 -2
  24. alloc-0.3.0/tests/test_probe_hw.py +83 -0
  25. alloc-0.3.0/tests/test_probe_multi.py +114 -0
  26. alloc-0.3.0/tests/test_stability.py +173 -0
  27. alloc-0.3.0/tests/test_upload.py +105 -0
  28. alloc-0.3.0/tests/test_verdict.py +187 -0
  29. alloc-0.2.0/src/alloc/__init__.py +0 -9
  30. alloc-0.2.0/src/alloc/display.py +0 -85
  31. alloc-0.2.0/src/alloc/probe.py +0 -229
  32. alloc-0.2.0/tests/test_cli.py +0 -38
  33. {alloc-0.2.0 → alloc-0.3.0}/setup.cfg +0 -0
  34. {alloc-0.2.0 → alloc-0.3.0}/src/alloc/config.py +0 -0
  35. {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/dependency_links.txt +0 -0
  36. {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/entry_points.txt +0 -0
  37. {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/requires.txt +0 -0
  38. {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alloc
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: GPU intelligence for ML training — right-size before you launch.
5
5
  Author-email: Alloc Labs <hello@alloclabs.com>
6
6
  License: Apache-2.0
@@ -61,14 +61,19 @@ Analyzes model parameters from the script filename and computes VRAM breakdown.
61
61
  ### `alloc run` — Training with GPU monitoring
62
62
 
63
63
  ```bash
64
- alloc run python train.py
64
+ alloc run python train.py # calibrate and exit (default)
65
+ alloc run --full python train.py # monitor full training run
65
66
  alloc run torchrun --nproc_per_node=4 train.py
66
67
  alloc run -- python train.py --epochs 10
67
68
  ```
68
69
 
69
70
  Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and writes an artifact.
70
71
 
71
- **Short-run mode:** `--probe-steps N` profiles for N samples then auto-stops when metrics stabilize (variance threshold < 5.0% over 20 samples).
72
+ **Default: calibrate-and-exit.** Auto-stops when GPU metrics stabilize (~30-60s), prints a verdict with bottleneck classification and recommendation, then exits. Use `--full` to monitor the entire run. Use `--timeout N` to adjust max calibration time (default 120s).
73
+
74
+ **Multi-GPU:** Automatically discovers all GPUs used by the process tree (works with `torchrun`, `accelerate launch`, etc.).
75
+
76
+ **Hardware context:** Captures driver version, CUDA version, and SM compute capability from NVML.
72
77
 
73
78
  ### `alloc login` — Authenticate with dashboard
74
79
 
@@ -85,6 +90,18 @@ alloc upload alloc_artifact.json.gz
85
90
 
86
91
  Uploads a previously saved `.json.gz` artifact to the dashboard via `POST /runs/ingest`. Requires authentication (`alloc login` first).
87
92
 
93
+ ### `alloc catalog` — Browse GPU hardware catalog
94
+
95
+ ```bash
96
+ alloc catalog list # list all 13 GPUs (sorted by VRAM)
97
+ alloc catalog list --sort cost # sort by $/hr
98
+ alloc catalog list --sort tflops # sort by BF16 TFLOPS
99
+ alloc catalog show H100 # detailed specs for H100
100
+ alloc catalog show nvidia-a100-sxm-80gb # lookup by stable ID
101
+ ```
102
+
103
+ Offline reference for GPU specs, interconnect details, and cloud pricing. Supports aliases (H100, A100, T4) and stable IDs.
104
+
88
105
  ### `alloc version`
89
106
 
90
107
  ```bash
@@ -119,8 +136,11 @@ All config via environment variables. Zero config files required.
119
136
  | Module | Purpose |
120
137
  |--------|---------|
121
138
  | `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
122
- | `probe.py` | External GPU monitoring via `pynvml`. Runs user script unmodified as subprocess. |
123
- | `artifact_writer.py` | Artifact Writer: writes `alloc_artifact.json.gz`, optional W&B upload. |
139
+ | `probe.py` | External GPU monitoring via `pynvml`. Process-tree aware multi-GPU discovery. Captures hardware context (driver, CUDA, SM version). |
140
+ | `stability.py` | Multi-signal stability detection for calibrate-and-exit (VRAM plateau + util std dev + power std dev). |
141
+ | `catalog/` | Bundled GPU hardware catalog (13 GPUs) with specs and pricing. Powers `alloc catalog` commands. |
142
+ | `context.py` | Context autodiscovery: git (SHA, branch, repo), container (Docker/Podman), Ray (job ID, cluster). |
143
+ | `artifact_writer.py` | Artifact Writer: writes `alloc_artifact.json.gz` (v0.5.0) with probe, ghost, hardware, and context sections. |
124
144
  | `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `login`, `upload`, `version` commands. |
125
145
  | `callbacks.py` | Framework callbacks: HuggingFace `TrainerCallback` (step count capture). |
126
146
  | `upload.py` | Artifact uploader: POSTs `.json.gz` to `POST /runs/ingest`. |
@@ -32,14 +32,19 @@ Analyzes model parameters from the script filename and computes VRAM breakdown.
32
32
  ### `alloc run` — Training with GPU monitoring
33
33
 
34
34
  ```bash
35
- alloc run python train.py
35
+ alloc run python train.py # calibrate and exit (default)
36
+ alloc run --full python train.py # monitor full training run
36
37
  alloc run torchrun --nproc_per_node=4 train.py
37
38
  alloc run -- python train.py --epochs 10
38
39
  ```
39
40
 
40
41
  Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and writes an artifact.
41
42
 
42
- **Short-run mode:** `--probe-steps N` profiles for N samples then auto-stops when metrics stabilize (variance threshold < 5.0% over 20 samples).
43
+ **Default: calibrate-and-exit.** Auto-stops when GPU metrics stabilize (~30-60s), prints a verdict with bottleneck classification and recommendation, then exits. Use `--full` to monitor the entire run. Use `--timeout N` to adjust max calibration time (default 120s).
44
+
45
+ **Multi-GPU:** Automatically discovers all GPUs used by the process tree (works with `torchrun`, `accelerate launch`, etc.).
46
+
47
+ **Hardware context:** Captures driver version, CUDA version, and SM compute capability from NVML.
43
48
 
44
49
  ### `alloc login` — Authenticate with dashboard
45
50
 
@@ -56,6 +61,18 @@ alloc upload alloc_artifact.json.gz
56
61
 
57
62
  Uploads a previously saved `.json.gz` artifact to the dashboard via `POST /runs/ingest`. Requires authentication (`alloc login` first).
58
63
 
64
+ ### `alloc catalog` — Browse GPU hardware catalog
65
+
66
+ ```bash
67
+ alloc catalog list # list all 13 GPUs (sorted by VRAM)
68
+ alloc catalog list --sort cost # sort by $/hr
69
+ alloc catalog list --sort tflops # sort by BF16 TFLOPS
70
+ alloc catalog show H100 # detailed specs for H100
71
+ alloc catalog show nvidia-a100-sxm-80gb # lookup by stable ID
72
+ ```
73
+
74
+ Offline reference for GPU specs, interconnect details, and cloud pricing. Supports aliases (H100, A100, T4) and stable IDs.
75
+
59
76
  ### `alloc version`
60
77
 
61
78
  ```bash
@@ -90,8 +107,11 @@ All config via environment variables. Zero config files required.
90
107
  | Module | Purpose |
91
108
  |--------|---------|
92
109
  | `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
93
- | `probe.py` | External GPU monitoring via `pynvml`. Runs user script unmodified as subprocess. |
94
- | `artifact_writer.py` | Artifact Writer: writes `alloc_artifact.json.gz`, optional W&B upload. |
110
+ | `probe.py` | External GPU monitoring via `pynvml`. Process-tree aware multi-GPU discovery. Captures hardware context (driver, CUDA, SM version). |
111
+ | `stability.py` | Multi-signal stability detection for calibrate-and-exit (VRAM plateau + util std dev + power std dev). |
112
+ | `catalog/` | Bundled GPU hardware catalog (13 GPUs) with specs and pricing. Powers `alloc catalog` commands. |
113
+ | `context.py` | Context autodiscovery: git (SHA, branch, repo), container (Docker/Podman), Ray (job ID, cluster). |
114
+ | `artifact_writer.py` | Artifact Writer: writes `alloc_artifact.json.gz` (v0.5.0) with probe, ghost, hardware, and context sections. |
95
115
  | `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `login`, `upload`, `version` commands. |
96
116
  | `callbacks.py` | Framework callbacks: HuggingFace `TrainerCallback` (step count capture). |
97
117
  | `upload.py` | Artifact uploader: POSTs `.json.gz` to `POST /runs/ingest`. |
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alloc"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "GPU intelligence for ML training — right-size before you launch."
9
9
  readme = "README.md"
10
10
  license = {text = "Apache-2.0"}
@@ -41,3 +41,6 @@ Repository = "https://github.com/alloc-labs/alloc"
41
41
 
42
42
  [tool.setuptools.packages.find]
43
43
  where = ["src"]
44
+
45
+ [tool.setuptools.package-data]
46
+ "alloc.catalog" = ["*.json"]
@@ -0,0 +1,10 @@
1
+ """Alloc — GPU intelligence for ML training."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.3.0"
6
+
7
+ from alloc.ghost import ghost, GhostReport
8
+ from alloc.callbacks import AllocCallback as HuggingFaceCallback
9
+
10
+ __all__ = ["ghost", "GhostReport", "HuggingFaceCallback", "__version__"]
@@ -16,6 +16,8 @@ def write_report(
16
16
  ghost_report: Optional[dict] = None,
17
17
  probe_result: Optional[dict] = None,
18
18
  output_path: Optional[str] = None,
19
+ hardware_context: Optional[dict] = None,
20
+ context: Optional[dict] = None,
19
21
  ) -> str:
20
22
  """Write an artifact to disk.
21
23
 
@@ -34,10 +36,12 @@ def write_report(
34
36
  )
35
37
 
36
38
  report = {
37
- "version": "0.2.0",
39
+ "version": "0.5.0",
38
40
  "timestamp": datetime.now(timezone.utc).isoformat(),
39
41
  "ghost": ghost_report,
40
42
  "probe": probe_result,
43
+ "hardware": hardware_context,
44
+ "context": context if context else None,
41
45
  }
42
46
 
43
47
  with gzip.open(resolved_path, "wt", encoding="utf-8") as f:
@@ -45,9 +45,14 @@ try:
45
45
  def __init__(self):
46
46
  # type: () -> None
47
47
  self.step_count = 0 # type: int
48
+ self._last_write_step = 0 # type: int
49
+ self._write_every = 10 # type: int
48
50
 
49
51
  def on_step_end(self, args, state, control, **kwargs):
50
52
  self.step_count = state.global_step
53
+ if self.step_count - self._last_write_step >= self._write_every:
54
+ _write_step_count(self.step_count, framework="huggingface")
55
+ self._last_write_step = self.step_count
51
56
 
52
57
  def on_train_end(self, args, state, control, **kwargs):
53
58
  self.step_count = state.global_step
@@ -0,0 +1,109 @@
1
+ """GPU catalog — offline hardware specs and pricing for CLI.
2
+
3
+ Source of truth: apps/api/src/engine/catalog/gpus.v1.json
4
+ This is a bundled copy for offline CLI use. Update when the API catalog changes.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional
12
+
13
+ _CATALOG_DIR = Path(__file__).parent
14
+
15
+ # Aliases for common shorthand names
16
+ _ALIASES = {
17
+ "H100": "nvidia-h100-sxm-80gb",
18
+ "H100-80GB": "nvidia-h100-sxm-80gb",
19
+ "A100": "nvidia-a100-sxm-80gb",
20
+ "A100-80GB": "nvidia-a100-sxm-80gb",
21
+ "A100-40GB": "nvidia-a100-40gb",
22
+ "A10G": "nvidia-a10g-24gb",
23
+ "L40S": "nvidia-l40s-48gb",
24
+ "L4": "nvidia-l4-24gb",
25
+ "T4": "nvidia-t4-16gb",
26
+ "V100": "nvidia-v100-32gb",
27
+ "V100-32GB": "nvidia-v100-32gb",
28
+ "V100-16GB": "nvidia-v100-16gb",
29
+ "RTX-4090": "nvidia-rtx4090-24gb",
30
+ "RTX-3090": "nvidia-rtx3090-24gb",
31
+ "H200": "nvidia-h200-141gb",
32
+ "H100-NVL": "nvidia-h100-nvl-94gb",
33
+ }
34
+
35
+
36
+ def _load_catalog() -> dict:
37
+ """Load GPU catalog from bundled JSON."""
38
+ with open(_CATALOG_DIR / "gpus.v1.json") as f:
39
+ return json.load(f)
40
+
41
+
42
+ def _load_rate_card() -> dict:
43
+ """Load default rate card from bundled JSON."""
44
+ with open(_CATALOG_DIR / "default_rate_card.json") as f:
45
+ return json.load(f)
46
+
47
+
48
+ def list_gpus() -> List[dict]:
49
+ """Return all GPUs sorted by VRAM descending.
50
+
51
+ Each entry has: id, display_name, vendor, vram_gb, architecture,
52
+ bandwidth_gbps, bf16_tflops, tdp_watts, pricing.
53
+ """
54
+ catalog = _load_catalog()
55
+ rate_card = _load_rate_card()
56
+
57
+ result = []
58
+ for gpu_id, spec in catalog.get("gpus", {}).items():
59
+ pricing = rate_card.get("rates", {}).get(spec["display_name"], {})
60
+ result.append({
61
+ "id": gpu_id,
62
+ "display_name": spec["display_name"],
63
+ "vendor": spec.get("vendor", "nvidia"),
64
+ "vram_gb": spec["vram_gb"],
65
+ "architecture": spec.get("architecture", ""),
66
+ "bandwidth_gbps": spec.get("bandwidth_gbps", 0),
67
+ "bf16_tflops": spec.get("bf16_tflops", 0),
68
+ "fp16_tflops": spec.get("fp16_tflops", 0),
69
+ "fp32_tflops": spec.get("fp32_tflops", 0),
70
+ "tf32_tflops": spec.get("tf32_tflops", 0),
71
+ "tdp_watts": spec.get("tdp_watts", 0),
72
+ "interconnect": spec.get("interconnect"),
73
+ "pricing": pricing,
74
+ })
75
+
76
+ return sorted(result, key=lambda x: x["vram_gb"], reverse=True)
77
+
78
+
79
+ def get_gpu(gpu_id: str) -> Optional[dict]:
80
+ """Look up a GPU by stable ID or alias.
81
+
82
+ Returns full spec dict or None if not found.
83
+ """
84
+ # Resolve aliases
85
+ resolved = _ALIASES.get(gpu_id, gpu_id)
86
+
87
+ catalog = _load_catalog()
88
+ rate_card = _load_rate_card()
89
+
90
+ spec = catalog.get("gpus", {}).get(resolved)
91
+ if not spec:
92
+ return None
93
+
94
+ pricing = rate_card.get("rates", {}).get(spec["display_name"], {})
95
+ return {
96
+ "id": resolved,
97
+ "display_name": spec["display_name"],
98
+ "vendor": spec.get("vendor", "nvidia"),
99
+ "vram_gb": spec["vram_gb"],
100
+ "architecture": spec.get("architecture", ""),
101
+ "bandwidth_gbps": spec.get("bandwidth_gbps", 0),
102
+ "bf16_tflops": spec.get("bf16_tflops", 0),
103
+ "fp16_tflops": spec.get("fp16_tflops", 0),
104
+ "fp32_tflops": spec.get("fp32_tflops", 0),
105
+ "tf32_tflops": spec.get("tf32_tflops", 0),
106
+ "tdp_watts": spec.get("tdp_watts", 0),
107
+ "interconnect": spec.get("interconnect"),
108
+ "pricing": pricing,
109
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "version": "1",
3
+ "rates": {
4
+ "H200": { "aws": 5.50, "gcp": 5.30, "azure": 5.40 },
5
+ "H100-80GB": { "aws": 4.00, "gcp": 3.90, "azure": 3.85 },
6
+ "H100-NVL": { "aws": 4.50, "gcp": 4.40, "azure": 4.30 },
7
+ "A100-80GB": { "aws": 2.50, "gcp": 2.48, "azure": 2.55 },
8
+ "A100-40GB": { "aws": 2.00, "gcp": 1.95, "azure": 2.10 },
9
+ "A10G": { "aws": 0.75, "gcp": 0.70, "azure": 0.80 },
10
+ "L40S": { "aws": 1.50, "gcp": 1.45, "azure": 1.55 },
11
+ "L4": { "aws": 0.50, "gcp": 0.45, "azure": 0.55 },
12
+ "T4": { "aws": 0.35, "gcp": 0.30, "azure": 0.40 },
13
+ "V100-32GB": { "aws": 1.20, "gcp": 1.15, "azure": 1.25 },
14
+ "V100-16GB": { "aws": 0.90, "gcp": 0.85, "azure": 0.95 },
15
+ "RTX-4090": { "lambda": 0.70, "coreweave": 0.74 },
16
+ "RTX-3090": { "lambda": 0.50, "coreweave": 0.54 }
17
+ }
18
+ }
@@ -0,0 +1,174 @@
1
+ {
2
+ "version": "1",
3
+ "gpus": {
4
+ "nvidia-h200-141gb": {
5
+ "display_name": "H200",
6
+ "vendor": "nvidia",
7
+ "vram_gb": 141,
8
+ "bandwidth_gbps": 4800,
9
+ "fp16_tflops": 989,
10
+ "bf16_tflops": 989,
11
+ "fp32_tflops": 67,
12
+ "tf32_tflops": 495,
13
+ "architecture": "Hopper",
14
+ "tdp_watts": 700,
15
+ "interconnect": { "nvlink_gen": 4, "nvlink_bw_gbps": 900, "pcie_gen": 5 }
16
+ },
17
+ "nvidia-h100-sxm-80gb": {
18
+ "display_name": "H100-80GB",
19
+ "vendor": "nvidia",
20
+ "vram_gb": 80,
21
+ "bandwidth_gbps": 3350,
22
+ "fp16_tflops": 989,
23
+ "bf16_tflops": 989,
24
+ "fp32_tflops": 67,
25
+ "tf32_tflops": 495,
26
+ "architecture": "Hopper",
27
+ "tdp_watts": 700,
28
+ "interconnect": { "nvlink_gen": 4, "nvlink_bw_gbps": 900, "pcie_gen": 5 }
29
+ },
30
+ "nvidia-h100-nvl-94gb": {
31
+ "display_name": "H100-NVL",
32
+ "vendor": "nvidia",
33
+ "vram_gb": 94,
34
+ "bandwidth_gbps": 3350,
35
+ "fp16_tflops": 989,
36
+ "bf16_tflops": 989,
37
+ "fp32_tflops": 67,
38
+ "tf32_tflops": 495,
39
+ "architecture": "Hopper",
40
+ "tdp_watts": 400,
41
+ "interconnect": { "nvlink_gen": 4, "nvlink_bw_gbps": 900, "pcie_gen": 5 }
42
+ },
43
+ "nvidia-a100-sxm-80gb": {
44
+ "display_name": "A100-80GB",
45
+ "vendor": "nvidia",
46
+ "vram_gb": 80,
47
+ "bandwidth_gbps": 2039,
48
+ "fp16_tflops": 312,
49
+ "bf16_tflops": 312,
50
+ "fp32_tflops": 19.5,
51
+ "tf32_tflops": 156,
52
+ "architecture": "Ampere",
53
+ "tdp_watts": 400,
54
+ "interconnect": { "nvlink_gen": 3, "nvlink_bw_gbps": 600, "pcie_gen": 4 }
55
+ },
56
+ "nvidia-a100-40gb": {
57
+ "display_name": "A100-40GB",
58
+ "vendor": "nvidia",
59
+ "vram_gb": 40,
60
+ "bandwidth_gbps": 1555,
61
+ "fp16_tflops": 312,
62
+ "bf16_tflops": 312,
63
+ "fp32_tflops": 19.5,
64
+ "tf32_tflops": 156,
65
+ "architecture": "Ampere",
66
+ "tdp_watts": 400,
67
+ "interconnect": { "nvlink_gen": 3, "nvlink_bw_gbps": 600, "pcie_gen": 4 }
68
+ },
69
+ "nvidia-a10g-24gb": {
70
+ "display_name": "A10G",
71
+ "vendor": "nvidia",
72
+ "vram_gb": 24,
73
+ "bandwidth_gbps": 600,
74
+ "fp16_tflops": 125,
75
+ "bf16_tflops": 125,
76
+ "fp32_tflops": 31.2,
77
+ "tf32_tflops": 62.5,
78
+ "architecture": "Ampere",
79
+ "tdp_watts": 300,
80
+ "interconnect": { "pcie_gen": 4 }
81
+ },
82
+ "nvidia-l40s-48gb": {
83
+ "display_name": "L40S",
84
+ "vendor": "nvidia",
85
+ "vram_gb": 48,
86
+ "bandwidth_gbps": 864,
87
+ "fp16_tflops": 362,
88
+ "bf16_tflops": 362,
89
+ "fp32_tflops": 91.6,
90
+ "tf32_tflops": 183,
91
+ "architecture": "Ada Lovelace",
92
+ "tdp_watts": 350,
93
+ "interconnect": { "pcie_gen": 4 }
94
+ },
95
+ "nvidia-l4-24gb": {
96
+ "display_name": "L4",
97
+ "vendor": "nvidia",
98
+ "vram_gb": 24,
99
+ "bandwidth_gbps": 300,
100
+ "fp16_tflops": 121,
101
+ "bf16_tflops": 121,
102
+ "fp32_tflops": 30.3,
103
+ "tf32_tflops": 60,
104
+ "architecture": "Ada Lovelace",
105
+ "tdp_watts": 72,
106
+ "interconnect": { "pcie_gen": 4 }
107
+ },
108
+ "nvidia-t4-16gb": {
109
+ "display_name": "T4",
110
+ "vendor": "nvidia",
111
+ "vram_gb": 16,
112
+ "bandwidth_gbps": 320,
113
+ "fp16_tflops": 65,
114
+ "bf16_tflops": 0,
115
+ "fp32_tflops": 8.1,
116
+ "tf32_tflops": 0,
117
+ "architecture": "Turing",
118
+ "tdp_watts": 70,
119
+ "interconnect": { "pcie_gen": 3 }
120
+ },
121
+ "nvidia-v100-32gb": {
122
+ "display_name": "V100-32GB",
123
+ "vendor": "nvidia",
124
+ "vram_gb": 32,
125
+ "bandwidth_gbps": 900,
126
+ "fp16_tflops": 125,
127
+ "bf16_tflops": 0,
128
+ "fp32_tflops": 15.7,
129
+ "tf32_tflops": 0,
130
+ "architecture": "Volta",
131
+ "tdp_watts": 300,
132
+ "interconnect": { "nvlink_gen": 2, "nvlink_bw_gbps": 300, "pcie_gen": 3 }
133
+ },
134
+ "nvidia-v100-16gb": {
135
+ "display_name": "V100-16GB",
136
+ "vendor": "nvidia",
137
+ "vram_gb": 16,
138
+ "bandwidth_gbps": 900,
139
+ "fp16_tflops": 125,
140
+ "bf16_tflops": 0,
141
+ "fp32_tflops": 15.7,
142
+ "tf32_tflops": 0,
143
+ "architecture": "Volta",
144
+ "tdp_watts": 300,
145
+ "interconnect": { "nvlink_gen": 2, "nvlink_bw_gbps": 300, "pcie_gen": 3 }
146
+ },
147
+ "nvidia-rtx4090-24gb": {
148
+ "display_name": "RTX-4090",
149
+ "vendor": "nvidia",
150
+ "vram_gb": 24,
151
+ "bandwidth_gbps": 1008,
152
+ "fp16_tflops": 330,
153
+ "bf16_tflops": 330,
154
+ "fp32_tflops": 82.6,
155
+ "tf32_tflops": 165,
156
+ "architecture": "Ada Lovelace",
157
+ "tdp_watts": 450,
158
+ "interconnect": { "pcie_gen": 4 }
159
+ },
160
+ "nvidia-rtx3090-24gb": {
161
+ "display_name": "RTX-3090",
162
+ "vendor": "nvidia",
163
+ "vram_gb": 24,
164
+ "bandwidth_gbps": 936,
165
+ "fp16_tflops": 142,
166
+ "bf16_tflops": 142,
167
+ "fp32_tflops": 35.6,
168
+ "tf32_tflops": 71,
169
+ "architecture": "Ampere",
170
+ "tdp_watts": 350,
171
+ "interconnect": { "pcie_gen": 4 }
172
+ }
173
+ }
174
+ }