alloc 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {alloc-0.2.0 → alloc-0.3.0}/PKG-INFO +25 -5
- {alloc-0.2.0 → alloc-0.3.0}/README.md +24 -4
- {alloc-0.2.0 → alloc-0.3.0}/pyproject.toml +4 -1
- alloc-0.3.0/src/alloc/__init__.py +10 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc/artifact_writer.py +5 -1
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc/callbacks.py +5 -0
- alloc-0.3.0/src/alloc/catalog/__init__.py +109 -0
- alloc-0.3.0/src/alloc/catalog/default_rate_card.json +18 -0
- alloc-0.3.0/src/alloc/catalog/gpus.v1.json +174 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc/cli.py +280 -35
- alloc-0.3.0/src/alloc/context.py +191 -0
- alloc-0.3.0/src/alloc/display.py +510 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc/ghost.py +5 -1
- alloc-0.3.0/src/alloc/probe.py +449 -0
- alloc-0.3.0/src/alloc/stability.py +144 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc/upload.py +19 -1
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/PKG-INFO +25 -5
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/SOURCES.txt +14 -1
- alloc-0.3.0/tests/test_artifact.py +128 -0
- alloc-0.3.0/tests/test_catalog.py +83 -0
- alloc-0.3.0/tests/test_cli.py +130 -0
- alloc-0.3.0/tests/test_context.py +135 -0
- {alloc-0.2.0 → alloc-0.3.0}/tests/test_ghost.py +9 -2
- alloc-0.3.0/tests/test_probe_hw.py +83 -0
- alloc-0.3.0/tests/test_probe_multi.py +114 -0
- alloc-0.3.0/tests/test_stability.py +173 -0
- alloc-0.3.0/tests/test_upload.py +105 -0
- alloc-0.3.0/tests/test_verdict.py +187 -0
- alloc-0.2.0/src/alloc/__init__.py +0 -9
- alloc-0.2.0/src/alloc/display.py +0 -85
- alloc-0.2.0/src/alloc/probe.py +0 -229
- alloc-0.2.0/tests/test_cli.py +0 -38
- {alloc-0.2.0 → alloc-0.3.0}/setup.cfg +0 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc/config.py +0 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/dependency_links.txt +0 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/entry_points.txt +0 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/requires.txt +0 -0
- {alloc-0.2.0 → alloc-0.3.0}/src/alloc.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alloc
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: GPU intelligence for ML training — right-size before you launch.
|
|
5
5
|
Author-email: Alloc Labs <hello@alloclabs.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -61,14 +61,19 @@ Analyzes model parameters from the script filename and computes VRAM breakdown.
|
|
|
61
61
|
### `alloc run` — Training with GPU monitoring
|
|
62
62
|
|
|
63
63
|
```bash
|
|
64
|
-
alloc run python train.py
|
|
64
|
+
alloc run python train.py # calibrate and exit (default)
|
|
65
|
+
alloc run --full python train.py # monitor full training run
|
|
65
66
|
alloc run torchrun --nproc_per_node=4 train.py
|
|
66
67
|
alloc run -- python train.py --epochs 10
|
|
67
68
|
```
|
|
68
69
|
|
|
69
70
|
Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and writes an artifact.
|
|
70
71
|
|
|
71
|
-
**
|
|
72
|
+
**Default: calibrate-and-exit.** Auto-stops when GPU metrics stabilize (~30-60s), prints a verdict with bottleneck classification and recommendation, then exits. Use `--full` to monitor the entire run. Use `--timeout N` to adjust max calibration time (default 120s).
|
|
73
|
+
|
|
74
|
+
**Multi-GPU:** Automatically discovers all GPUs used by the process tree (works with `torchrun`, `accelerate launch`, etc.).
|
|
75
|
+
|
|
76
|
+
**Hardware context:** Captures driver version, CUDA version, and SM compute capability from NVML.
|
|
72
77
|
|
|
73
78
|
### `alloc login` — Authenticate with dashboard
|
|
74
79
|
|
|
@@ -85,6 +90,18 @@ alloc upload alloc_artifact.json.gz
|
|
|
85
90
|
|
|
86
91
|
Uploads a previously saved `.json.gz` artifact to the dashboard via `POST /runs/ingest`. Requires authentication (`alloc login` first).
|
|
87
92
|
|
|
93
|
+
### `alloc catalog` — Browse GPU hardware catalog
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
alloc catalog list # list all 13 GPUs (sorted by VRAM)
|
|
97
|
+
alloc catalog list --sort cost # sort by $/hr
|
|
98
|
+
alloc catalog list --sort tflops # sort by BF16 TFLOPS
|
|
99
|
+
alloc catalog show H100 # detailed specs for H100
|
|
100
|
+
alloc catalog show nvidia-a100-sxm-80gb # lookup by stable ID
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Offline reference for GPU specs, interconnect details, and cloud pricing. Supports aliases (H100, A100, T4) and stable IDs.
|
|
104
|
+
|
|
88
105
|
### `alloc version`
|
|
89
106
|
|
|
90
107
|
```bash
|
|
@@ -119,8 +136,11 @@ All config via environment variables. Zero config files required.
|
|
|
119
136
|
| Module | Purpose |
|
|
120
137
|
|--------|---------|
|
|
121
138
|
| `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
|
|
122
|
-
| `probe.py` | External GPU monitoring via `pynvml`.
|
|
123
|
-
| `
|
|
139
|
+
| `probe.py` | External GPU monitoring via `pynvml`. Process-tree aware multi-GPU discovery. Captures hardware context (driver, CUDA, SM version). |
|
|
140
|
+
| `stability.py` | Multi-signal stability detection for calibrate-and-exit (VRAM plateau + util std dev + power std dev). |
|
|
141
|
+
| `catalog/` | Bundled GPU hardware catalog (13 GPUs) with specs and pricing. Powers `alloc catalog` commands. |
|
|
142
|
+
| `context.py` | Context autodiscovery: git (SHA, branch, repo), container (Docker/Podman), Ray (job ID, cluster). |
|
|
143
|
+
| `artifact_writer.py` | Artifact Writer: writes `alloc_artifact.json.gz` (v0.5.0) with probe, ghost, hardware, and context sections. |
|
|
124
144
|
| `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `login`, `upload`, `version` commands. |
|
|
125
145
|
| `callbacks.py` | Framework callbacks: HuggingFace `TrainerCallback` (step count capture). |
|
|
126
146
|
| `upload.py` | Artifact uploader: POSTs `.json.gz` to `POST /runs/ingest`. |
|
|
@@ -32,14 +32,19 @@ Analyzes model parameters from the script filename and computes VRAM breakdown.
|
|
|
32
32
|
### `alloc run` — Training with GPU monitoring
|
|
33
33
|
|
|
34
34
|
```bash
|
|
35
|
-
alloc run python train.py
|
|
35
|
+
alloc run python train.py # calibrate and exit (default)
|
|
36
|
+
alloc run --full python train.py # monitor full training run
|
|
36
37
|
alloc run torchrun --nproc_per_node=4 train.py
|
|
37
38
|
alloc run -- python train.py --epochs 10
|
|
38
39
|
```
|
|
39
40
|
|
|
40
41
|
Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and writes an artifact.
|
|
41
42
|
|
|
42
|
-
**
|
|
43
|
+
**Default: calibrate-and-exit.** Auto-stops when GPU metrics stabilize (~30-60s), prints a verdict with bottleneck classification and recommendation, then exits. Use `--full` to monitor the entire run. Use `--timeout N` to adjust max calibration time (default 120s).
|
|
44
|
+
|
|
45
|
+
**Multi-GPU:** Automatically discovers all GPUs used by the process tree (works with `torchrun`, `accelerate launch`, etc.).
|
|
46
|
+
|
|
47
|
+
**Hardware context:** Captures driver version, CUDA version, and SM compute capability from NVML.
|
|
43
48
|
|
|
44
49
|
### `alloc login` — Authenticate with dashboard
|
|
45
50
|
|
|
@@ -56,6 +61,18 @@ alloc upload alloc_artifact.json.gz
|
|
|
56
61
|
|
|
57
62
|
Uploads a previously saved `.json.gz` artifact to the dashboard via `POST /runs/ingest`. Requires authentication (`alloc login` first).
|
|
58
63
|
|
|
64
|
+
### `alloc catalog` — Browse GPU hardware catalog
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
alloc catalog list # list all 13 GPUs (sorted by VRAM)
|
|
68
|
+
alloc catalog list --sort cost # sort by $/hr
|
|
69
|
+
alloc catalog list --sort tflops # sort by BF16 TFLOPS
|
|
70
|
+
alloc catalog show H100 # detailed specs for H100
|
|
71
|
+
alloc catalog show nvidia-a100-sxm-80gb # lookup by stable ID
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Offline reference for GPU specs, interconnect details, and cloud pricing. Supports aliases (H100, A100, T4) and stable IDs.
|
|
75
|
+
|
|
59
76
|
### `alloc version`
|
|
60
77
|
|
|
61
78
|
```bash
|
|
@@ -90,8 +107,11 @@ All config via environment variables. Zero config files required.
|
|
|
90
107
|
| Module | Purpose |
|
|
91
108
|
|--------|---------|
|
|
92
109
|
| `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
|
|
93
|
-
| `probe.py` | External GPU monitoring via `pynvml`.
|
|
94
|
-
| `
|
|
110
|
+
| `probe.py` | External GPU monitoring via `pynvml`. Process-tree aware multi-GPU discovery. Captures hardware context (driver, CUDA, SM version). |
|
|
111
|
+
| `stability.py` | Multi-signal stability detection for calibrate-and-exit (VRAM plateau + util std dev + power std dev). |
|
|
112
|
+
| `catalog/` | Bundled GPU hardware catalog (13 GPUs) with specs and pricing. Powers `alloc catalog` commands. |
|
|
113
|
+
| `context.py` | Context autodiscovery: git (SHA, branch, repo), container (Docker/Podman), Ray (job ID, cluster). |
|
|
114
|
+
| `artifact_writer.py` | Artifact Writer: writes `alloc_artifact.json.gz` (v0.5.0) with probe, ghost, hardware, and context sections. |
|
|
95
115
|
| `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `login`, `upload`, `version` commands. |
|
|
96
116
|
| `callbacks.py` | Framework callbacks: HuggingFace `TrainerCallback` (step count capture). |
|
|
97
117
|
| `upload.py` | Artifact uploader: POSTs `.json.gz` to `POST /runs/ingest`. |
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "alloc"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "GPU intelligence for ML training — right-size before you launch."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "Apache-2.0"}
|
|
@@ -41,3 +41,6 @@ Repository = "https://github.com/alloc-labs/alloc"
|
|
|
41
41
|
|
|
42
42
|
[tool.setuptools.packages.find]
|
|
43
43
|
where = ["src"]
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.package-data]
|
|
46
|
+
"alloc.catalog" = ["*.json"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Alloc — GPU intelligence for ML training."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__version__ = "0.3.0"
|
|
6
|
+
|
|
7
|
+
from alloc.ghost import ghost, GhostReport
|
|
8
|
+
from alloc.callbacks import AllocCallback as HuggingFaceCallback
|
|
9
|
+
|
|
10
|
+
__all__ = ["ghost", "GhostReport", "HuggingFaceCallback", "__version__"]
|
|
@@ -16,6 +16,8 @@ def write_report(
|
|
|
16
16
|
ghost_report: Optional[dict] = None,
|
|
17
17
|
probe_result: Optional[dict] = None,
|
|
18
18
|
output_path: Optional[str] = None,
|
|
19
|
+
hardware_context: Optional[dict] = None,
|
|
20
|
+
context: Optional[dict] = None,
|
|
19
21
|
) -> str:
|
|
20
22
|
"""Write an artifact to disk.
|
|
21
23
|
|
|
@@ -34,10 +36,12 @@ def write_report(
|
|
|
34
36
|
)
|
|
35
37
|
|
|
36
38
|
report = {
|
|
37
|
-
"version": "0.
|
|
39
|
+
"version": "0.5.0",
|
|
38
40
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
39
41
|
"ghost": ghost_report,
|
|
40
42
|
"probe": probe_result,
|
|
43
|
+
"hardware": hardware_context,
|
|
44
|
+
"context": context if context else None,
|
|
41
45
|
}
|
|
42
46
|
|
|
43
47
|
with gzip.open(resolved_path, "wt", encoding="utf-8") as f:
|
|
@@ -45,9 +45,14 @@ try:
|
|
|
45
45
|
def __init__(self):
|
|
46
46
|
# type: () -> None
|
|
47
47
|
self.step_count = 0 # type: int
|
|
48
|
+
self._last_write_step = 0 # type: int
|
|
49
|
+
self._write_every = 10 # type: int
|
|
48
50
|
|
|
49
51
|
def on_step_end(self, args, state, control, **kwargs):
|
|
50
52
|
self.step_count = state.global_step
|
|
53
|
+
if self.step_count - self._last_write_step >= self._write_every:
|
|
54
|
+
_write_step_count(self.step_count, framework="huggingface")
|
|
55
|
+
self._last_write_step = self.step_count
|
|
51
56
|
|
|
52
57
|
def on_train_end(self, args, state, control, **kwargs):
|
|
53
58
|
self.step_count = state.global_step
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""GPU catalog — offline hardware specs and pricing for CLI.
|
|
2
|
+
|
|
3
|
+
Source of truth: apps/api/src/engine/catalog/gpus.v1.json
|
|
4
|
+
This is a bundled copy for offline CLI use. Update when the API catalog changes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
_CATALOG_DIR = Path(__file__).parent
|
|
14
|
+
|
|
15
|
+
# Aliases for common shorthand names
|
|
16
|
+
_ALIASES = {
|
|
17
|
+
"H100": "nvidia-h100-sxm-80gb",
|
|
18
|
+
"H100-80GB": "nvidia-h100-sxm-80gb",
|
|
19
|
+
"A100": "nvidia-a100-sxm-80gb",
|
|
20
|
+
"A100-80GB": "nvidia-a100-sxm-80gb",
|
|
21
|
+
"A100-40GB": "nvidia-a100-40gb",
|
|
22
|
+
"A10G": "nvidia-a10g-24gb",
|
|
23
|
+
"L40S": "nvidia-l40s-48gb",
|
|
24
|
+
"L4": "nvidia-l4-24gb",
|
|
25
|
+
"T4": "nvidia-t4-16gb",
|
|
26
|
+
"V100": "nvidia-v100-32gb",
|
|
27
|
+
"V100-32GB": "nvidia-v100-32gb",
|
|
28
|
+
"V100-16GB": "nvidia-v100-16gb",
|
|
29
|
+
"RTX-4090": "nvidia-rtx4090-24gb",
|
|
30
|
+
"RTX-3090": "nvidia-rtx3090-24gb",
|
|
31
|
+
"H200": "nvidia-h200-141gb",
|
|
32
|
+
"H100-NVL": "nvidia-h100-nvl-94gb",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _load_catalog() -> dict:
|
|
37
|
+
"""Load GPU catalog from bundled JSON."""
|
|
38
|
+
with open(_CATALOG_DIR / "gpus.v1.json") as f:
|
|
39
|
+
return json.load(f)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _load_rate_card() -> dict:
|
|
43
|
+
"""Load default rate card from bundled JSON."""
|
|
44
|
+
with open(_CATALOG_DIR / "default_rate_card.json") as f:
|
|
45
|
+
return json.load(f)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def list_gpus() -> List[dict]:
|
|
49
|
+
"""Return all GPUs sorted by VRAM descending.
|
|
50
|
+
|
|
51
|
+
Each entry has: id, display_name, vendor, vram_gb, architecture,
|
|
52
|
+
bandwidth_gbps, bf16_tflops, tdp_watts, pricing.
|
|
53
|
+
"""
|
|
54
|
+
catalog = _load_catalog()
|
|
55
|
+
rate_card = _load_rate_card()
|
|
56
|
+
|
|
57
|
+
result = []
|
|
58
|
+
for gpu_id, spec in catalog.get("gpus", {}).items():
|
|
59
|
+
pricing = rate_card.get("rates", {}).get(spec["display_name"], {})
|
|
60
|
+
result.append({
|
|
61
|
+
"id": gpu_id,
|
|
62
|
+
"display_name": spec["display_name"],
|
|
63
|
+
"vendor": spec.get("vendor", "nvidia"),
|
|
64
|
+
"vram_gb": spec["vram_gb"],
|
|
65
|
+
"architecture": spec.get("architecture", ""),
|
|
66
|
+
"bandwidth_gbps": spec.get("bandwidth_gbps", 0),
|
|
67
|
+
"bf16_tflops": spec.get("bf16_tflops", 0),
|
|
68
|
+
"fp16_tflops": spec.get("fp16_tflops", 0),
|
|
69
|
+
"fp32_tflops": spec.get("fp32_tflops", 0),
|
|
70
|
+
"tf32_tflops": spec.get("tf32_tflops", 0),
|
|
71
|
+
"tdp_watts": spec.get("tdp_watts", 0),
|
|
72
|
+
"interconnect": spec.get("interconnect"),
|
|
73
|
+
"pricing": pricing,
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
return sorted(result, key=lambda x: x["vram_gb"], reverse=True)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_gpu(gpu_id: str) -> Optional[dict]:
|
|
80
|
+
"""Look up a GPU by stable ID or alias.
|
|
81
|
+
|
|
82
|
+
Returns full spec dict or None if not found.
|
|
83
|
+
"""
|
|
84
|
+
# Resolve aliases
|
|
85
|
+
resolved = _ALIASES.get(gpu_id, gpu_id)
|
|
86
|
+
|
|
87
|
+
catalog = _load_catalog()
|
|
88
|
+
rate_card = _load_rate_card()
|
|
89
|
+
|
|
90
|
+
spec = catalog.get("gpus", {}).get(resolved)
|
|
91
|
+
if not spec:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
pricing = rate_card.get("rates", {}).get(spec["display_name"], {})
|
|
95
|
+
return {
|
|
96
|
+
"id": resolved,
|
|
97
|
+
"display_name": spec["display_name"],
|
|
98
|
+
"vendor": spec.get("vendor", "nvidia"),
|
|
99
|
+
"vram_gb": spec["vram_gb"],
|
|
100
|
+
"architecture": spec.get("architecture", ""),
|
|
101
|
+
"bandwidth_gbps": spec.get("bandwidth_gbps", 0),
|
|
102
|
+
"bf16_tflops": spec.get("bf16_tflops", 0),
|
|
103
|
+
"fp16_tflops": spec.get("fp16_tflops", 0),
|
|
104
|
+
"fp32_tflops": spec.get("fp32_tflops", 0),
|
|
105
|
+
"tf32_tflops": spec.get("tf32_tflops", 0),
|
|
106
|
+
"tdp_watts": spec.get("tdp_watts", 0),
|
|
107
|
+
"interconnect": spec.get("interconnect"),
|
|
108
|
+
"pricing": pricing,
|
|
109
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1",
|
|
3
|
+
"rates": {
|
|
4
|
+
"H200": { "aws": 5.50, "gcp": 5.30, "azure": 5.40 },
|
|
5
|
+
"H100-80GB": { "aws": 4.00, "gcp": 3.90, "azure": 3.85 },
|
|
6
|
+
"H100-NVL": { "aws": 4.50, "gcp": 4.40, "azure": 4.30 },
|
|
7
|
+
"A100-80GB": { "aws": 2.50, "gcp": 2.48, "azure": 2.55 },
|
|
8
|
+
"A100-40GB": { "aws": 2.00, "gcp": 1.95, "azure": 2.10 },
|
|
9
|
+
"A10G": { "aws": 0.75, "gcp": 0.70, "azure": 0.80 },
|
|
10
|
+
"L40S": { "aws": 1.50, "gcp": 1.45, "azure": 1.55 },
|
|
11
|
+
"L4": { "aws": 0.50, "gcp": 0.45, "azure": 0.55 },
|
|
12
|
+
"T4": { "aws": 0.35, "gcp": 0.30, "azure": 0.40 },
|
|
13
|
+
"V100-32GB": { "aws": 1.20, "gcp": 1.15, "azure": 1.25 },
|
|
14
|
+
"V100-16GB": { "aws": 0.90, "gcp": 0.85, "azure": 0.95 },
|
|
15
|
+
"RTX-4090": { "lambda": 0.70, "coreweave": 0.74 },
|
|
16
|
+
"RTX-3090": { "lambda": 0.50, "coreweave": 0.54 }
|
|
17
|
+
}
|
|
18
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1",
|
|
3
|
+
"gpus": {
|
|
4
|
+
"nvidia-h200-141gb": {
|
|
5
|
+
"display_name": "H200",
|
|
6
|
+
"vendor": "nvidia",
|
|
7
|
+
"vram_gb": 141,
|
|
8
|
+
"bandwidth_gbps": 4800,
|
|
9
|
+
"fp16_tflops": 989,
|
|
10
|
+
"bf16_tflops": 989,
|
|
11
|
+
"fp32_tflops": 67,
|
|
12
|
+
"tf32_tflops": 495,
|
|
13
|
+
"architecture": "Hopper",
|
|
14
|
+
"tdp_watts": 700,
|
|
15
|
+
"interconnect": { "nvlink_gen": 4, "nvlink_bw_gbps": 900, "pcie_gen": 5 }
|
|
16
|
+
},
|
|
17
|
+
"nvidia-h100-sxm-80gb": {
|
|
18
|
+
"display_name": "H100-80GB",
|
|
19
|
+
"vendor": "nvidia",
|
|
20
|
+
"vram_gb": 80,
|
|
21
|
+
"bandwidth_gbps": 3350,
|
|
22
|
+
"fp16_tflops": 989,
|
|
23
|
+
"bf16_tflops": 989,
|
|
24
|
+
"fp32_tflops": 67,
|
|
25
|
+
"tf32_tflops": 495,
|
|
26
|
+
"architecture": "Hopper",
|
|
27
|
+
"tdp_watts": 700,
|
|
28
|
+
"interconnect": { "nvlink_gen": 4, "nvlink_bw_gbps": 900, "pcie_gen": 5 }
|
|
29
|
+
},
|
|
30
|
+
"nvidia-h100-nvl-94gb": {
|
|
31
|
+
"display_name": "H100-NVL",
|
|
32
|
+
"vendor": "nvidia",
|
|
33
|
+
"vram_gb": 94,
|
|
34
|
+
"bandwidth_gbps": 3350,
|
|
35
|
+
"fp16_tflops": 989,
|
|
36
|
+
"bf16_tflops": 989,
|
|
37
|
+
"fp32_tflops": 67,
|
|
38
|
+
"tf32_tflops": 495,
|
|
39
|
+
"architecture": "Hopper",
|
|
40
|
+
"tdp_watts": 400,
|
|
41
|
+
"interconnect": { "nvlink_gen": 4, "nvlink_bw_gbps": 900, "pcie_gen": 5 }
|
|
42
|
+
},
|
|
43
|
+
"nvidia-a100-sxm-80gb": {
|
|
44
|
+
"display_name": "A100-80GB",
|
|
45
|
+
"vendor": "nvidia",
|
|
46
|
+
"vram_gb": 80,
|
|
47
|
+
"bandwidth_gbps": 2039,
|
|
48
|
+
"fp16_tflops": 312,
|
|
49
|
+
"bf16_tflops": 312,
|
|
50
|
+
"fp32_tflops": 19.5,
|
|
51
|
+
"tf32_tflops": 156,
|
|
52
|
+
"architecture": "Ampere",
|
|
53
|
+
"tdp_watts": 400,
|
|
54
|
+
"interconnect": { "nvlink_gen": 3, "nvlink_bw_gbps": 600, "pcie_gen": 4 }
|
|
55
|
+
},
|
|
56
|
+
"nvidia-a100-40gb": {
|
|
57
|
+
"display_name": "A100-40GB",
|
|
58
|
+
"vendor": "nvidia",
|
|
59
|
+
"vram_gb": 40,
|
|
60
|
+
"bandwidth_gbps": 1555,
|
|
61
|
+
"fp16_tflops": 312,
|
|
62
|
+
"bf16_tflops": 312,
|
|
63
|
+
"fp32_tflops": 19.5,
|
|
64
|
+
"tf32_tflops": 156,
|
|
65
|
+
"architecture": "Ampere",
|
|
66
|
+
"tdp_watts": 400,
|
|
67
|
+
"interconnect": { "nvlink_gen": 3, "nvlink_bw_gbps": 600, "pcie_gen": 4 }
|
|
68
|
+
},
|
|
69
|
+
"nvidia-a10g-24gb": {
|
|
70
|
+
"display_name": "A10G",
|
|
71
|
+
"vendor": "nvidia",
|
|
72
|
+
"vram_gb": 24,
|
|
73
|
+
"bandwidth_gbps": 600,
|
|
74
|
+
"fp16_tflops": 125,
|
|
75
|
+
"bf16_tflops": 125,
|
|
76
|
+
"fp32_tflops": 31.2,
|
|
77
|
+
"tf32_tflops": 62.5,
|
|
78
|
+
"architecture": "Ampere",
|
|
79
|
+
"tdp_watts": 300,
|
|
80
|
+
"interconnect": { "pcie_gen": 4 }
|
|
81
|
+
},
|
|
82
|
+
"nvidia-l40s-48gb": {
|
|
83
|
+
"display_name": "L40S",
|
|
84
|
+
"vendor": "nvidia",
|
|
85
|
+
"vram_gb": 48,
|
|
86
|
+
"bandwidth_gbps": 864,
|
|
87
|
+
"fp16_tflops": 362,
|
|
88
|
+
"bf16_tflops": 362,
|
|
89
|
+
"fp32_tflops": 91.6,
|
|
90
|
+
"tf32_tflops": 183,
|
|
91
|
+
"architecture": "Ada Lovelace",
|
|
92
|
+
"tdp_watts": 350,
|
|
93
|
+
"interconnect": { "pcie_gen": 4 }
|
|
94
|
+
},
|
|
95
|
+
"nvidia-l4-24gb": {
|
|
96
|
+
"display_name": "L4",
|
|
97
|
+
"vendor": "nvidia",
|
|
98
|
+
"vram_gb": 24,
|
|
99
|
+
"bandwidth_gbps": 300,
|
|
100
|
+
"fp16_tflops": 121,
|
|
101
|
+
"bf16_tflops": 121,
|
|
102
|
+
"fp32_tflops": 30.3,
|
|
103
|
+
"tf32_tflops": 60,
|
|
104
|
+
"architecture": "Ada Lovelace",
|
|
105
|
+
"tdp_watts": 72,
|
|
106
|
+
"interconnect": { "pcie_gen": 4 }
|
|
107
|
+
},
|
|
108
|
+
"nvidia-t4-16gb": {
|
|
109
|
+
"display_name": "T4",
|
|
110
|
+
"vendor": "nvidia",
|
|
111
|
+
"vram_gb": 16,
|
|
112
|
+
"bandwidth_gbps": 320,
|
|
113
|
+
"fp16_tflops": 65,
|
|
114
|
+
"bf16_tflops": 0,
|
|
115
|
+
"fp32_tflops": 8.1,
|
|
116
|
+
"tf32_tflops": 0,
|
|
117
|
+
"architecture": "Turing",
|
|
118
|
+
"tdp_watts": 70,
|
|
119
|
+
"interconnect": { "pcie_gen": 3 }
|
|
120
|
+
},
|
|
121
|
+
"nvidia-v100-32gb": {
|
|
122
|
+
"display_name": "V100-32GB",
|
|
123
|
+
"vendor": "nvidia",
|
|
124
|
+
"vram_gb": 32,
|
|
125
|
+
"bandwidth_gbps": 900,
|
|
126
|
+
"fp16_tflops": 125,
|
|
127
|
+
"bf16_tflops": 0,
|
|
128
|
+
"fp32_tflops": 15.7,
|
|
129
|
+
"tf32_tflops": 0,
|
|
130
|
+
"architecture": "Volta",
|
|
131
|
+
"tdp_watts": 300,
|
|
132
|
+
"interconnect": { "nvlink_gen": 2, "nvlink_bw_gbps": 300, "pcie_gen": 3 }
|
|
133
|
+
},
|
|
134
|
+
"nvidia-v100-16gb": {
|
|
135
|
+
"display_name": "V100-16GB",
|
|
136
|
+
"vendor": "nvidia",
|
|
137
|
+
"vram_gb": 16,
|
|
138
|
+
"bandwidth_gbps": 900,
|
|
139
|
+
"fp16_tflops": 125,
|
|
140
|
+
"bf16_tflops": 0,
|
|
141
|
+
"fp32_tflops": 15.7,
|
|
142
|
+
"tf32_tflops": 0,
|
|
143
|
+
"architecture": "Volta",
|
|
144
|
+
"tdp_watts": 300,
|
|
145
|
+
"interconnect": { "nvlink_gen": 2, "nvlink_bw_gbps": 300, "pcie_gen": 3 }
|
|
146
|
+
},
|
|
147
|
+
"nvidia-rtx4090-24gb": {
|
|
148
|
+
"display_name": "RTX-4090",
|
|
149
|
+
"vendor": "nvidia",
|
|
150
|
+
"vram_gb": 24,
|
|
151
|
+
"bandwidth_gbps": 1008,
|
|
152
|
+
"fp16_tflops": 330,
|
|
153
|
+
"bf16_tflops": 330,
|
|
154
|
+
"fp32_tflops": 82.6,
|
|
155
|
+
"tf32_tflops": 165,
|
|
156
|
+
"architecture": "Ada Lovelace",
|
|
157
|
+
"tdp_watts": 450,
|
|
158
|
+
"interconnect": { "pcie_gen": 4 }
|
|
159
|
+
},
|
|
160
|
+
"nvidia-rtx3090-24gb": {
|
|
161
|
+
"display_name": "RTX-3090",
|
|
162
|
+
"vendor": "nvidia",
|
|
163
|
+
"vram_gb": 24,
|
|
164
|
+
"bandwidth_gbps": 936,
|
|
165
|
+
"fp16_tflops": 142,
|
|
166
|
+
"bf16_tflops": 142,
|
|
167
|
+
"fp32_tflops": 35.6,
|
|
168
|
+
"tf32_tflops": 71,
|
|
169
|
+
"architecture": "Ampere",
|
|
170
|
+
"tdp_watts": 350,
|
|
171
|
+
"interconnect": { "pcie_gen": 4 }
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|