kavier 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. kavier/__init__.py +108 -0
  2. kavier/inference/__init__.py +269 -0
  3. kavier/py.typed +0 -0
  4. kavier/training/__init__.py +164 -0
  5. kavier-0.3.1.dist-info/METADATA +129 -0
  6. kavier-0.3.1.dist-info/RECORD +78 -0
  7. kavier-0.3.1.dist-info/WHEEL +5 -0
  8. kavier-0.3.1.dist-info/entry_points.txt +7 -0
  9. kavier-0.3.1.dist-info/licenses/LICENSE.txt +21 -0
  10. kavier-0.3.1.dist-info/top_level.txt +8 -0
  11. kavier_co2/__init__.py +1 -0
  12. kavier_co2/cli.py +163 -0
  13. kavier_co2/engine.py +157 -0
  14. kavier_co2/fragments.py +91 -0
  15. kavier_co2/py.typed +0 -0
  16. kavier_energy/__init__.py +1 -0
  17. kavier_energy/calculator.py +55 -0
  18. kavier_energy/cli_args.py +26 -0
  19. kavier_energy/engine.py +19 -0
  20. kavier_energy/metrics.py +57 -0
  21. kavier_energy/py.typed +0 -0
  22. kavier_inference/__init__.py +1 -0
  23. kavier_inference/cli.py +29 -0
  24. kavier_inference/core/__init__.py +1 -0
  25. kavier_inference/core/args.py +73 -0
  26. kavier_inference/core/cache.py +41 -0
  27. kavier_inference/core/config.py +34 -0
  28. kavier_inference/core/engine.py +74 -0
  29. kavier_inference/core/metrics.py +58 -0
  30. kavier_inference/core/runner.py +79 -0
  31. kavier_inference/core/service.py +57 -0
  32. kavier_inference/data/input/input_example.csv +7 -0
  33. kavier_inference/py.typed +0 -0
  34. kavier_inference/stages/__init__.py +1 -0
  35. kavier_inference/stages/decode.py +20 -0
  36. kavier_inference/stages/gpu_usage.py +14 -0
  37. kavier_inference/stages/kv_usage.py +27 -0
  38. kavier_inference/stages/prefill.py +12 -0
  39. kavier_io/__init__.py +1 -0
  40. kavier_io/config.py +29 -0
  41. kavier_io/constants.py +6 -0
  42. kavier_io/input_spec.py +80 -0
  43. kavier_io/log.py +8 -0
  44. kavier_io/opendc/__init__.py +1 -0
  45. kavier_io/opendc/adapter.py +79 -0
  46. kavier_io/opendc/py.typed +0 -0
  47. kavier_io/opendc/schema.py +27 -0
  48. kavier_io/py.typed +0 -0
  49. kavier_io/stream_writer.py +25 -0
  50. kavier_io/training_opendc.py +135 -0
  51. kavier_library/__init__.py +13 -0
  52. kavier_library/gpu.py +152 -0
  53. kavier_library/llm.py +156 -0
  54. kavier_library/lookup.py +33 -0
  55. kavier_library/py.typed +0 -0
  56. kavier_library/specs/GPUSpec.py +32 -0
  57. kavier_library/specs/LLMSpec.py +25 -0
  58. kavier_library/specs/__init__.py +6 -0
  59. kavier_training/__init__.py +1 -0
  60. kavier_training/cli.py +132 -0
  61. kavier_training/core/__init__.py +1 -0
  62. kavier_training/core/calibration.py +106 -0
  63. kavier_training/core/cli_args.py +39 -0
  64. kavier_training/core/config.py +4 -0
  65. kavier_training/core/engine.py +237 -0
  66. kavier_training/data/calibration.json +96 -0
  67. kavier_training/data/input/input_example.csv +101 -0
  68. kavier_training/py.typed +0 -0
  69. kavier_training/validation/__init__.py +1 -0
  70. kavier_training/validation/run_benchmarks.py +125 -0
  71. kavier_training/validation/validator.py +117 -0
  72. kavier_ui/__init__.py +11 -0
  73. kavier_ui/__main__.py +16 -0
  74. kavier_ui/app.py +264 -0
  75. kavier_ui/prompts.py +243 -0
  76. kavier_ui/render.py +202 -0
  77. kavier_ui/sims.py +37 -0
  78. kavier_ui/theme.py +46 -0
kavier/__init__.py ADDED
@@ -0,0 +1,108 @@
1
+ """Umbrella facade: the public ``inference``/``training`` API plus ``kavier.<sub>`` legacy aliases."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib as _importlib
6
+ import sys as _sys
7
+ from importlib.abc import Loader as _Loader
8
+ from importlib.abc import MetaPathFinder as _MetaPathFinder
9
+ from importlib.machinery import ModuleSpec as _ModuleSpec
10
+ from importlib.metadata import PackageNotFoundError as _PackageNotFoundError
11
+ from importlib.metadata import version as _pkg_version
12
+ from types import ModuleType as _ModuleType
13
+ from typing import Any as _Any
14
+ from typing import Sequence as _Sequence
15
+
16
+ # Pure aliases: ``kavier.<sub>`` (bare AND deep) resolves to the same object as ``kavier_<sub>``.
17
+ _ALIAS_TO_LEGACY = {
18
+ "io": "kavier_io",
19
+ "energy": "kavier_energy",
20
+ "co2": "kavier_co2",
21
+ "library": "kavier_library",
22
+ "opendc": "kavier_io.opendc",
23
+ }
24
+
25
+ # ``inference`` and ``training`` are REAL packages here (the public predictor API). The bare module is
26
+ # the new package, but DEEP imports (``kavier.training.core.calibration`` ...) still resolve to the
27
+ # legacy package — preserving the live calibration ``_CAL`` swap contract (one module per spelling).
28
+ _API_PACKAGES = {
29
+ "inference": "kavier_inference",
30
+ "training": "kavier_training",
31
+ }
32
+
33
+
34
+ class _LegacyAliasFinder(_MetaPathFinder):
35
+ _prefix = f"{__name__}."
36
+
37
+ def find_spec(
38
+ self,
39
+ fullname: str,
40
+ path: _Sequence[str] | None = None,
41
+ target: _ModuleType | None = None,
42
+ ) -> _ModuleSpec | None:
43
+ if not fullname.startswith(self._prefix):
44
+ return None
45
+ tail = fullname[len(self._prefix) :]
46
+ head, _, rest = tail.partition(".")
47
+ legacy_root = _ALIAS_TO_LEGACY.get(head)
48
+ if legacy_root is None:
49
+ # Deep imports under the real API packages alias to legacy; the bare package does not.
50
+ if rest and head in _API_PACKAGES:
51
+ legacy_root = _API_PACKAGES[head]
52
+ else:
53
+ return None
54
+ legacy_name = legacy_root if not rest else f"{legacy_root}.{rest}"
55
+ spec = _ModuleSpec(fullname, _LegacyAliasLoader(legacy_name))
56
+ return spec
57
+
58
+
59
+ class _LegacyAliasLoader(_Loader):
60
+ def __init__(self, legacy_name: str) -> None:
61
+ self._legacy_name = legacy_name
62
+
63
+ def create_module(self, spec: _ModuleSpec) -> _ModuleType:
64
+ module = _importlib.import_module(self._legacy_name)
65
+ _sys.modules[spec.name] = module
66
+ return module
67
+
68
+ def exec_module(self, module: _ModuleType) -> None: # already executed
69
+ return None
70
+
71
+
72
+ _sys.meta_path.insert(0, _LegacyAliasFinder())
73
+
74
+
75
+ def __getattr__(name: str) -> _Any:
76
+ legacy = _ALIAS_TO_LEGACY.get(name)
77
+ if legacy is not None:
78
+ return _importlib.import_module(f"{__name__}.{name}")
79
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
80
+
81
+
82
+ from kavier import inference, training # noqa: E402
83
+ from kavier_library.gpu import GPU_SPEC_LIBRARY # noqa: E402
84
+ from kavier_library.llm import LLM_SPEC_LIBRARY # noqa: E402
85
+ from kavier_training.core.engine import ( # noqa: E402
86
+ simulate_full_training,
87
+ simulate_training_step,
88
+ )
89
+
90
+ __all__ = [
91
+ "simulate_training_step",
92
+ "simulate_full_training",
93
+ "GPU_SPEC_LIBRARY",
94
+ "LLM_SPEC_LIBRARY",
95
+ "training",
96
+ "inference",
97
+ "io",
98
+ "energy",
99
+ "co2",
100
+ "library",
101
+ "opendc",
102
+ ]
103
+
104
+ # Version from installed dist metadata; pyproject's static ``version`` is the single source of truth.
105
+ try:
106
+ __version__ = _pkg_version("kavier")
107
+ except _PackageNotFoundError: # editable/source tree without dist metadata
108
+ __version__ = "0.0.0+unknown"
@@ -0,0 +1,269 @@
1
+ """Inference predictors: ``performance / energy / efficiency / carbon`` over a batch of serving workloads.
2
+
3
+ Each verb takes a *batch* — a pandas DataFrame, a list of dicts, or a single dict (one row per
4
+ workload) — and returns a DataFrame: the input rows plus the predicted columns. The per-row engine
5
+ logic is canonical here; ``kavier_ui.sims`` and the CLIs consume it, so the numbers match exactly.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import datetime as dt
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from kavier_co2.engine import CarbonTrace, Fragment, compute_emissions
19
+ from kavier_inference.core.cache import PrefixCache
20
+ from kavier_inference.core.config import CacheCfg, SimConfig
21
+ from kavier_inference.core.metrics import Metrics
22
+ from kavier_inference.core.runner import simulate_one
23
+ from kavier_library import get_gpu, get_llm
24
+
25
+ # Defaults for workload keys a batch may omit (mirror the UI prompt defaults).
26
+ DEFAULT_KV_CACHE = True
27
+ DEFAULT_PREFIX_POLICY = "prefill"
28
+ DEFAULT_PREFIX_MIN_TOKENS = 1024
29
+ DEFAULT_INTENSITY_G_KWH = 400.0
30
+ DEFAULT_GPU_HOUR_PRICE = 2.5
31
+
32
+ Batch = "pd.DataFrame | list[dict[str, Any]] | dict[str, Any]"
33
+
34
+
35
+ def _drop_missing(row: dict[str, Any]) -> dict[str, Any]:
36
+ """Drop NaN/None cells so ``.get(key)`` means 'absent' — a heterogeneous DataFrame fills gaps with NaN."""
37
+ out: dict[str, Any] = {}
38
+ for k, v in row.items():
39
+ if v is None:
40
+ continue
41
+ if isinstance(v, float) and pd.isna(v):
42
+ continue
43
+ out[k] = v
44
+ return out
45
+
46
+
47
+ def _normalise(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> list[dict[str, Any]]:
48
+ """Coerce a DataFrame | list[dict] | single dict into a list of plain row dicts (NaN cells dropped)."""
49
+ if isinstance(batch, pd.DataFrame):
50
+ records: list[dict[str, Any]] = [{str(k): v for k, v in rec.items()} for rec in batch.to_dict(orient="records")]
51
+ elif isinstance(batch, dict):
52
+ records = [dict(batch)]
53
+ else:
54
+ records = [dict(row) for row in batch]
55
+ return [_drop_missing(row) for row in records]
56
+
57
+
58
+ def _infer_params(row: dict[str, Any]) -> dict[str, Any]:
59
+ """Fill the inference-engine keys, defaulting cache settings the caller may omit."""
60
+ return {
61
+ **row,
62
+ "kv_cache": row.get("kv_cache", DEFAULT_KV_CACHE),
63
+ "prefix_policy": row.get("prefix_policy", DEFAULT_PREFIX_POLICY),
64
+ "prefix_min_tokens": row.get("prefix_min_tokens", DEFAULT_PREFIX_MIN_TOKENS),
65
+ }
66
+
67
+
68
+ def run_inference(p: dict[str, Any]) -> dict[str, Any]:
69
+ """Loop ``simulate_one`` over a homogeneous workload (same engine as the CLI, no disk I/O)."""
70
+ llm = get_llm(p["model"])
71
+ gpu = get_gpu(p["gpu"])
72
+ cfg = SimConfig(
73
+ export_rate=0.1,
74
+ kv_cache=bool(p["kv_cache"]),
75
+ cache=CacheCfg(min_len=int(p["prefix_min_tokens"]), action=p["prefix_policy"], scope="session", max_entries=10),
76
+ )
77
+
78
+ n = int(p["num_requests"])
79
+ n_in, n_out = int(p["input_tokens"]), int(p["output_tokens"])
80
+ cache = PrefixCache(cfg.cache)
81
+ metrics = Metrics()
82
+ t0 = int(time.time_ns() / 1e6)
83
+ ttfts: list[float] = []
84
+ tasks: list[dict[str, Any]] = []
85
+ for i in range(n):
86
+ task, _frags, t_p, t_d = simulate_one(
87
+ idx=i,
88
+ session_id=None,
89
+ n_in_tokens=n_in,
90
+ n_out_tokens=n_out,
91
+ in_tokens=None,
92
+ llm=llm,
93
+ gpu=gpu,
94
+ cache=cache,
95
+ cfg=cfg,
96
+ export_rate_s=cfg.export_rate,
97
+ t0_ms=t0,
98
+ )
99
+ metrics.add(t_p, t_d, (t_p + t_d) * 1000.0)
100
+ ttfts.append(t_p * 1000.0)
101
+ tasks.append(task)
102
+
103
+ total_s = metrics.sum_prefill + metrics.sum_decode
104
+ total_tokens = n * (n_in + n_out)
105
+ lat = np.asarray(metrics.latencies)
106
+ return {
107
+ "model": llm.name,
108
+ "gpu": gpu.name,
109
+ "num_requests": n,
110
+ "input_tokens": n_in,
111
+ "output_tokens": n_out,
112
+ "kv_cache": cfg.kv_cache,
113
+ "prefix_policy": cfg.cache.action,
114
+ "prefix_min_tokens": cfg.cache.min_len,
115
+ "prefill_s": metrics.sum_prefill,
116
+ "decode_s": metrics.sum_decode,
117
+ "total_s": total_s,
118
+ "mean_ttft_ms": float(np.mean(ttfts)),
119
+ "p50_ms": float(np.percentile(lat, 50)),
120
+ "p95_ms": float(np.percentile(lat, 95)),
121
+ "p99_ms": float(np.percentile(lat, 99)),
122
+ "throughput_req_s": n / total_s if total_s else 0.0,
123
+ "throughput_tok_s": total_tokens / total_s if total_s else 0.0,
124
+ "total_tokens": total_tokens,
125
+ "cache_hits": cache.hits,
126
+ "cache_hit_ratio": cache.hits / n if n else 0.0,
127
+ "evictions": cache.evictions,
128
+ "_tasks": tasks, # reused by the energy chain
129
+ }
130
+
131
+
132
+ def _flat_trace(start: pd.Timestamp, hours: float, intensity_g_kwh: float) -> CarbonTrace:
133
+ """Constant-intensity trace so ``compute_emissions`` runs without an external grid trace."""
134
+ rows = max(2, int(hours) + 2)
135
+ df = pd.DataFrame(
136
+ {
137
+ "timestamp": [start + dt.timedelta(hours=h) for h in range(rows)],
138
+ "carbon_intensity": [float(intensity_g_kwh)] * rows,
139
+ }
140
+ )
141
+ return CarbonTrace.from_dataframe(df)
142
+
143
+
144
+ def run_carbon_from_inference(infer: dict[str, Any], intensity_g_kwh: float) -> dict[str, Any]:
145
+ """Bill the GPU's max power over the summed busy time against a flat intensity."""
146
+ gpu = get_gpu(infer["gpu"])
147
+ runtime_s = float(infer["total_s"])
148
+ power_w = float(gpu.max_power_w)
149
+ start = pd.Timestamp("2026-01-01 00:00:00")
150
+ trace = _flat_trace(start, runtime_s / 3600.0, intensity_g_kwh)
151
+ frag = Fragment(start_time=start, duration_s=runtime_s, power_w=power_w)
152
+ res = compute_emissions([frag], trace)
153
+ return {
154
+ "source": "inference",
155
+ "model": infer["model"],
156
+ "gpu": infer["gpu"],
157
+ "intensity": float(intensity_g_kwh),
158
+ "runtime_s": runtime_s,
159
+ "power_w": power_w,
160
+ "total_energy_kwh": res.total_energy_kwh,
161
+ "total_co2_g": res.total_co2_g,
162
+ "total_co2_kg": res.total_co2_kg,
163
+ "total_tokens": infer["total_tokens"],
164
+ }
165
+
166
+
167
+ def energy_from_inference(infer: dict[str, Any], gpu_hour_price: float | None) -> dict[str, Any]:
168
+ """$/Mtoken from GPU-hours, matching kavier_energy.metrics.financial_efficiency."""
169
+ carbon = run_carbon_from_inference(infer, intensity_g_kwh=DEFAULT_INTENSITY_G_KWH)
170
+ total_tokens = infer["total_tokens"]
171
+ energy_wh = carbon["total_energy_kwh"] * 1000.0
172
+ per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
173
+ gpu_hours = infer["total_s"] / 3600.0
174
+ return {
175
+ "model": infer["model"],
176
+ "gpu": infer["gpu"],
177
+ "total_tokens": total_tokens,
178
+ "energy_wh": energy_wh,
179
+ "energy_kwh": carbon["total_energy_kwh"],
180
+ "energy_per_mtoken_wh": energy_wh * per_m,
181
+ "carbon_per_mtoken_g": carbon["total_co2_g"] * per_m,
182
+ "gpu_hours": gpu_hours,
183
+ "financial_per_mtoken": (gpu_hours * gpu_hour_price * per_m) if gpu_hour_price else None,
184
+ "tokens_per_wh": total_tokens / energy_wh if energy_wh else 0.0,
185
+ }
186
+
187
+
188
+ def export_opendc(infer: dict[str, Any], dst: Path) -> Path:
189
+ """Write the inference run's tasks/fragments as OpenDC input via the real adapter."""
190
+ from kavier_io.opendc.adapter import prepare_opendc_input
191
+
192
+ tasks = pd.DataFrame(infer["_tasks"])
193
+ # 1 fragment per task suffices for OpenDC's power model; adapter coerces the schema.
194
+ frags = pd.DataFrame(
195
+ [
196
+ {
197
+ "id": t["id"],
198
+ "duration": t["duration"],
199
+ "cpu_count": 1,
200
+ "cpu_usage": 0.0,
201
+ "gpu_count": 1,
202
+ "gpu_usage": t["gpu_capacity"],
203
+ }
204
+ for t in infer["_tasks"]
205
+ ]
206
+ )
207
+ dst.mkdir(parents=True, exist_ok=True)
208
+ prepare_opendc_input(tasks, frags, str(dst))
209
+ return dst
210
+
211
+
212
+ def _with_columns(rows: list[dict[str, Any]], predicted: list[dict[str, Any]]) -> pd.DataFrame:
213
+ """Input rows + predicted columns, one output row per input row."""
214
+ merged = [{**row, **pred} for row, pred in zip(rows, predicted)]
215
+ return pd.DataFrame(merged)
216
+
217
+
218
+ def performance(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
219
+ """Per-workload latency/throughput: + p50_ms, p95_ms, mean_ttft_ms, throughput_tok_s, total_s."""
220
+ rows = _normalise(batch)
221
+ cols = ("p50_ms", "p95_ms", "mean_ttft_ms", "throughput_tok_s", "throughput_req_s", "total_s", "total_tokens")
222
+ predicted: list[dict[str, Any]] = []
223
+ for row in rows:
224
+ r = run_inference(_infer_params(row))
225
+ predicted.append({k: r[k] for k in cols})
226
+ return _with_columns(rows, predicted)
227
+
228
+
229
+ def energy(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
230
+ """Per-workload energy (self-contained GPU-power estimate): + energy_wh, energy_per_mtoken_wh, tokens_per_wh."""
231
+ rows = _normalise(batch)
232
+ cols = ("energy_wh", "energy_kwh", "energy_per_mtoken_wh", "tokens_per_wh", "total_tokens")
233
+ predicted: list[dict[str, Any]] = []
234
+ for row in rows:
235
+ e = energy_from_inference(run_inference(_infer_params(row)), gpu_hour_price=None)
236
+ predicted.append({k: e[k] for k in cols})
237
+ return _with_columns(rows, predicted)
238
+
239
+
240
+ def efficiency(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
241
+ """Per-workload cost: + financial_per_mtoken ($/Mtoken). GPU $/hour from a ``gpu_hour_price`` column else 2.5."""
242
+ rows = _normalise(batch)
243
+ predicted: list[dict[str, Any]] = []
244
+ for row in rows:
245
+ price = float(row.get("gpu_hour_price", DEFAULT_GPU_HOUR_PRICE))
246
+ e = energy_from_inference(run_inference(_infer_params(row)), gpu_hour_price=price)
247
+ predicted.append({"financial_per_mtoken": e["financial_per_mtoken"], "gpu_hours": e["gpu_hours"]})
248
+ return _with_columns(rows, predicted)
249
+
250
+
251
+ def carbon(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
252
+ """Per-workload emissions: + total_co2_g, carbon_per_mtoken_g. Intensity from an ``intensity`` column else 400."""
253
+ rows = _normalise(batch)
254
+ predicted: list[dict[str, Any]] = []
255
+ for row in rows:
256
+ intensity = float(row.get("intensity", DEFAULT_INTENSITY_G_KWH))
257
+ infer = run_inference(_infer_params(row))
258
+ c = run_carbon_from_inference(infer, intensity_g_kwh=intensity)
259
+ total_tokens = c["total_tokens"]
260
+ per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
261
+ predicted.append(
262
+ {
263
+ "total_co2_g": c["total_co2_g"],
264
+ "total_co2_kg": c["total_co2_kg"],
265
+ "carbon_per_mtoken_g": c["total_co2_g"] * per_m,
266
+ "total_energy_kwh": c["total_energy_kwh"],
267
+ }
268
+ )
269
+ return _with_columns(rows, predicted)
kavier/py.typed ADDED
File without changes
@@ -0,0 +1,164 @@
1
+ """Training predictors: ``performance / energy / efficiency / carbon`` over a batch of fine-tuning jobs.
2
+
3
+ Each verb takes a *batch* — a pandas DataFrame, a list of dicts, or a single dict (one row per job)
4
+ — and returns a DataFrame: the input rows plus the predicted columns. The per-row engine logic is
5
+ canonical here; ``kavier_ui.sims`` and the CLIs consume it, so the numbers match exactly.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ import pandas as pd
13
+
14
+ from kavier.inference import (
15
+ DEFAULT_GPU_HOUR_PRICE,
16
+ DEFAULT_INTENSITY_G_KWH,
17
+ _flat_trace,
18
+ _normalise,
19
+ _with_columns,
20
+ )
21
+ from kavier_co2.engine import Fragment, compute_emissions
22
+ from kavier_training.core.engine import simulate_full_training, simulate_training_step
23
+
24
+ DEFAULT_NUM_NODES = 1
25
+
26
+
27
+ def _train_params(row: dict[str, Any]) -> dict[str, Any]:
28
+ """Default ``num_nodes`` (the only training key a batch commonly omits)."""
29
+ return {**row, "num_nodes": row.get("num_nodes", DEFAULT_NUM_NODES)}
30
+
31
+
32
+ def run_training(p: dict[str, Any]) -> dict[str, Any]:
33
+ """Aggregate throughput/runtime (``simulate_full_training``) + per-step metrics (``simulate_training_step``)."""
34
+ full = simulate_full_training(
35
+ model_name=p["model"],
36
+ method=p["method"],
37
+ gpu_model=p["gpu"],
38
+ tokens_per_sample=int(p["seq_len"]),
39
+ batch_size=int(p["batch_size"]),
40
+ number_gpus=int(p["num_gpus"]),
41
+ number_nodes=int(p["num_nodes"]),
42
+ total_tokens=int(p["total_tokens"]) if p.get("total_tokens") else None,
43
+ epochs=float(p["epochs"]) if p.get("epochs") else None,
44
+ dataset_tokens=int(p["dataset_tokens"]) if p.get("dataset_tokens") else None,
45
+ )
46
+ total_gpus = int(p["num_gpus"]) * int(p["num_nodes"])
47
+ step = simulate_training_step(
48
+ model_name=p["model"],
49
+ gpu_model=p["gpu"],
50
+ tokens_per_sample=int(p["seq_len"]),
51
+ batch_size=int(p["batch_size"]),
52
+ method=p["method"],
53
+ num_gpus=total_gpus,
54
+ num_nodes=int(p["num_nodes"]),
55
+ )
56
+ out: dict[str, Any] = {**full, **step, "total_gpus": total_gpus}
57
+ out["aggregate_power_w"] = step["gpu_power_watts"] * total_gpus
58
+ return out
59
+
60
+
61
+ def run_carbon_from_training(p: dict[str, Any]) -> dict[str, Any]:
62
+ """Bill one training-engine power fragment against a flat carbon intensity."""
63
+ tr = run_training(p)
64
+ runtime_s = float(tr["train_runtime"])
65
+ if runtime_s <= 0:
66
+ raise ValueError("training runtime is 0 — set a job size (total tokens or epochs) to bill carbon")
67
+ power_w = float(tr["aggregate_power_w"])
68
+ start = pd.Timestamp("2026-01-01 00:00:00")
69
+ trace = _flat_trace(start, runtime_s / 3600.0, p["intensity"])
70
+ frag = Fragment(start_time=start, duration_s=runtime_s, power_w=power_w)
71
+ res = compute_emissions([frag], trace)
72
+ return {
73
+ "source": "training",
74
+ "model": tr["model_name"],
75
+ "gpu": tr["gpu_name"],
76
+ "intensity": float(p["intensity"]),
77
+ "runtime_s": runtime_s,
78
+ "power_w": power_w,
79
+ "total_energy_kwh": res.total_energy_kwh,
80
+ "total_co2_g": res.total_co2_g,
81
+ "total_co2_kg": res.total_co2_kg,
82
+ "total_tokens": tr["total_tokens"],
83
+ }
84
+
85
+
86
+ def performance(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
87
+ """Per-job throughput/util: + train_tokens_per_second, train_runtime, gpu_compute_utilization, gpu_power_watts."""
88
+ rows = _normalise(batch)
89
+ cols = (
90
+ "train_tokens_per_second",
91
+ "train_runtime",
92
+ "train_samples_per_second",
93
+ "gpu_compute_utilization",
94
+ "gpu_memory_utilization",
95
+ "gpu_power_watts",
96
+ "total_tokens",
97
+ )
98
+ predicted: list[dict[str, Any]] = []
99
+ for row in rows:
100
+ r = run_training(_train_params(row))
101
+ predicted.append({k: r[k] for k in cols})
102
+ return _with_columns(rows, predicted)
103
+
104
+
105
+ def energy(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
106
+ """Per-job energy (self-contained GPU-power estimate): + energy_wh, energy_per_mtoken_wh, aggregate_power_w."""
107
+ rows = _normalise(batch)
108
+ predicted: list[dict[str, Any]] = []
109
+ for row in rows:
110
+ c = run_carbon_from_training({**_train_params(row), "intensity": DEFAULT_INTENSITY_G_KWH})
111
+ total_tokens = c["total_tokens"]
112
+ energy_wh = c["total_energy_kwh"] * 1000.0
113
+ per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
114
+ predicted.append(
115
+ {
116
+ "energy_wh": energy_wh,
117
+ "energy_kwh": c["total_energy_kwh"],
118
+ "energy_per_mtoken_wh": energy_wh * per_m,
119
+ "aggregate_power_w": c["power_w"],
120
+ "total_tokens": total_tokens,
121
+ }
122
+ )
123
+ return _with_columns(rows, predicted)
124
+
125
+
126
+ def efficiency(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
127
+ """Per-job cost: + financial_per_mtoken ($/Mtoken). GPU $/hour from a ``gpu_hour_price`` column else 2.5."""
128
+ rows = _normalise(batch)
129
+ predicted: list[dict[str, Any]] = []
130
+ for row in rows:
131
+ tr = run_training(_train_params(row))
132
+ total_tokens = tr["total_tokens"]
133
+ runtime_s = float(tr["train_runtime"])
134
+ price = float(row.get("gpu_hour_price", DEFAULT_GPU_HOUR_PRICE))
135
+ # GPU-hours = wall-clock runtime x total GPUs (matches the inference $/Mtoken basis).
136
+ gpu_hours = runtime_s / 3600.0 * int(tr["total_gpus"])
137
+ per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
138
+ predicted.append(
139
+ {
140
+ "financial_per_mtoken": (gpu_hours * price * per_m) if total_tokens else None,
141
+ "gpu_hours": gpu_hours,
142
+ }
143
+ )
144
+ return _with_columns(rows, predicted)
145
+
146
+
147
+ def carbon(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
148
+ """Per-job emissions: + total_co2_g, carbon_per_mtoken_g. Intensity from an ``intensity`` column else 400."""
149
+ rows = _normalise(batch)
150
+ predicted: list[dict[str, Any]] = []
151
+ for row in rows:
152
+ intensity = float(row.get("intensity", DEFAULT_INTENSITY_G_KWH))
153
+ c = run_carbon_from_training({**_train_params(row), "intensity": intensity})
154
+ total_tokens = c["total_tokens"]
155
+ per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
156
+ predicted.append(
157
+ {
158
+ "total_co2_g": c["total_co2_g"],
159
+ "total_co2_kg": c["total_co2_kg"],
160
+ "carbon_per_mtoken_g": c["total_co2_g"] * per_m,
161
+ "total_energy_kwh": c["total_energy_kwh"],
162
+ }
163
+ )
164
+ return _with_columns(rows, predicted)
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: kavier
3
+ Version: 0.3.1
4
+ Summary: Kavier: Simulating the Performance, Sustainability, and Efficiency of LLM Ecosystems under Inference and Training
5
+ Author: Radu Nicolae
6
+ Author-email: AtLarge Research <info@atlarge-research.com>
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/atlarge-research/kavier
9
+ Project-URL: Repository, https://github.com/atlarge-research/kavier
10
+ Project-URL: Documentation, https://github.com/atlarge-research/kavier
11
+ Project-URL: Bug Tracker, https://github.com/atlarge-research/kavier/issues
12
+ Keywords: LLM,simulation,performance,sustainability,energy,carbon,GPU,training,inference
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Intended Audience :: Developers
19
+ Classifier: Topic :: Scientific/Engineering
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE.txt
23
+ Requires-Dist: pydantic>=2.7
24
+ Requires-Dist: pandas>=2.2
25
+ Requires-Dist: pyarrow>=23.0.1
26
+ Requires-Dist: numpy>=2.2.6
27
+ Requires-Dist: tqdm>=4.67.1
28
+ Requires-Dist: cachetools>=6.1
29
+ Requires-Dist: rich>=13
30
+ Requires-Dist: pyyaml>=6
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=8.2; extra == "dev"
33
+ Requires-Dist: hypothesis>=6.102; extra == "dev"
34
+ Requires-Dist: ruff>=0.4; extra == "dev"
35
+ Requires-Dist: mypy>=1.10; extra == "dev"
36
+ Requires-Dist: pandas-stubs>=2.2; extra == "dev"
37
+ Requires-Dist: types-tqdm; extra == "dev"
38
+ Requires-Dist: types-cachetools; extra == "dev"
39
+ Requires-Dist: types-PyYAML; extra == "dev"
40
+ Dynamic: license-file
41
+
42
+ # Kavier
43
+
44
+ Simulating performance, sustainability, and efficiency of LLM Ecosystems under inference and training.
45
+
46
+ [![MIT License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
47
+ [![Documentation](https://img.shields.io/badge/docs-main-green.svg)](docs/index.html)
48
+ [![CI](https://github.com/atlarge-research/kavier/actions/workflows/ci.yml/badge.svg)](https://github.com/atlarge-research/kavier/actions/workflows/ci.yml)
49
+
50
+ ---
51
+
52
+ This repository is the home of Kavier, the first scientific instrument for
53
+ predicting performance, sustainability, and efficiency of LLM ecosystems under
54
+ inference and training.
55
+
56
+ Kavier helps operators, researchers, and engineers predict:
57
+ * **Performance** — inference latencies, training throughput, GPU utilization
58
+ * **Sustainability** — energy consumption, carbon emissions (gCO2/Mtoken)
59
+ * **Efficiency** — financial and energy cost per token/sample given GPU-hour prices
60
+
61
+ ## Quick start
62
+
63
+ ```bash
64
+ git clone https://github.com/atlarge-research/kavier.git
65
+ cd kavier
66
+
67
+ python -m venv .venv
68
+ source .venv/bin/activate # Windows: .venv\Scripts\activate
69
+ python -m pip install -U pip
70
+ pip install -e ".[dev]"
71
+ ```
72
+
73
+ Run your first simulation against the tiny bundled synthetic example trace:
74
+
75
+ ```bash
76
+ kavier-perf --trace src/kavier_inference/data/input/input_example.csv
77
+ ```
78
+
79
+ Congrats! You have just run your first simulation with Kavier! 🎉
80
+
81
+ Or skip the flags — launch the **interactive UI** and pick a simulator, model and
82
+ GPU from guided menus, then chain into energy/carbon or export OpenDC:
83
+
84
+ ```bash
85
+ kavier
86
+ ```
87
+
88
+ If you installed Kavier from PyPI (`pip install kavier`) you have no `src/`
89
+ directory; the same synthetic example trace ships inside the package, so resolve
90
+ its path via `importlib.resources`:
91
+
92
+ ```bash
93
+ TRACE=$(python -c "from importlib.resources import files; print(files('kavier_inference')/'data/input/input_example.csv')")
94
+ kavier-perf --trace "$TRACE"
95
+ ```
96
+
97
+ ## Structure
98
+
99
+ Kavier is organized into the following first-party packages:
100
+
101
+ ```
102
+ src/
103
+ ├── kavier/ # Umbrella facade (re-exports the sub-packages)
104
+ ├── kavier_inference/ # Inference simulation (kavier-perf)
105
+ ├── kavier_training/ # Training simulation (kavier-train)
106
+ ├── kavier_energy/ # Energy calculator (kavier-energy)
107
+ ├── kavier_co2/ # Carbon emissions (kavier-co2)
108
+ ├── kavier_library/ # Shared GPU & LLM specifications
109
+ ├── kavier_io/ # Shared I/O utilities
110
+ │ └── opendc/ # OpenDC workload export (tasks/fragments)
111
+ ├── kavier_ui/ # Interactive REPL (the `kavier` command)
112
+ └── tests/ # Test suites
113
+ ```
114
+
115
+ ## Documentation
116
+
117
+ See [docs/index.html](docs/index.html) for the main documentation: getting started,
118
+ the Kavier CLIs (`kavier` interactive UI, `kavier-perf`, `kavier-train`,
119
+ `kavier-energy`, `kavier-co2`), the YAML `--config` input,
120
+ structure, and the contributing guide.
121
+
122
+ ## Contributing
123
+
124
+ Questions, suggestions and contributions are welcome and appreciated!
125
+ Please refer to the [contributing guide](docs/contributing.md) for more details.
126
+
127
+ ## License
128
+
129
+ Kavier is distributed under the MIT license. See [LICENSE.txt](/LICENSE.txt).