kavier 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kavier/__init__.py +108 -0
- kavier/inference/__init__.py +269 -0
- kavier/py.typed +0 -0
- kavier/training/__init__.py +164 -0
- kavier-0.3.1.dist-info/METADATA +129 -0
- kavier-0.3.1.dist-info/RECORD +78 -0
- kavier-0.3.1.dist-info/WHEEL +5 -0
- kavier-0.3.1.dist-info/entry_points.txt +7 -0
- kavier-0.3.1.dist-info/licenses/LICENSE.txt +21 -0
- kavier-0.3.1.dist-info/top_level.txt +8 -0
- kavier_co2/__init__.py +1 -0
- kavier_co2/cli.py +163 -0
- kavier_co2/engine.py +157 -0
- kavier_co2/fragments.py +91 -0
- kavier_co2/py.typed +0 -0
- kavier_energy/__init__.py +1 -0
- kavier_energy/calculator.py +55 -0
- kavier_energy/cli_args.py +26 -0
- kavier_energy/engine.py +19 -0
- kavier_energy/metrics.py +57 -0
- kavier_energy/py.typed +0 -0
- kavier_inference/__init__.py +1 -0
- kavier_inference/cli.py +29 -0
- kavier_inference/core/__init__.py +1 -0
- kavier_inference/core/args.py +73 -0
- kavier_inference/core/cache.py +41 -0
- kavier_inference/core/config.py +34 -0
- kavier_inference/core/engine.py +74 -0
- kavier_inference/core/metrics.py +58 -0
- kavier_inference/core/runner.py +79 -0
- kavier_inference/core/service.py +57 -0
- kavier_inference/data/input/input_example.csv +7 -0
- kavier_inference/py.typed +0 -0
- kavier_inference/stages/__init__.py +1 -0
- kavier_inference/stages/decode.py +20 -0
- kavier_inference/stages/gpu_usage.py +14 -0
- kavier_inference/stages/kv_usage.py +27 -0
- kavier_inference/stages/prefill.py +12 -0
- kavier_io/__init__.py +1 -0
- kavier_io/config.py +29 -0
- kavier_io/constants.py +6 -0
- kavier_io/input_spec.py +80 -0
- kavier_io/log.py +8 -0
- kavier_io/opendc/__init__.py +1 -0
- kavier_io/opendc/adapter.py +79 -0
- kavier_io/opendc/py.typed +0 -0
- kavier_io/opendc/schema.py +27 -0
- kavier_io/py.typed +0 -0
- kavier_io/stream_writer.py +25 -0
- kavier_io/training_opendc.py +135 -0
- kavier_library/__init__.py +13 -0
- kavier_library/gpu.py +152 -0
- kavier_library/llm.py +156 -0
- kavier_library/lookup.py +33 -0
- kavier_library/py.typed +0 -0
- kavier_library/specs/GPUSpec.py +32 -0
- kavier_library/specs/LLMSpec.py +25 -0
- kavier_library/specs/__init__.py +6 -0
- kavier_training/__init__.py +1 -0
- kavier_training/cli.py +132 -0
- kavier_training/core/__init__.py +1 -0
- kavier_training/core/calibration.py +106 -0
- kavier_training/core/cli_args.py +39 -0
- kavier_training/core/config.py +4 -0
- kavier_training/core/engine.py +237 -0
- kavier_training/data/calibration.json +96 -0
- kavier_training/data/input/input_example.csv +101 -0
- kavier_training/py.typed +0 -0
- kavier_training/validation/__init__.py +1 -0
- kavier_training/validation/run_benchmarks.py +125 -0
- kavier_training/validation/validator.py +117 -0
- kavier_ui/__init__.py +11 -0
- kavier_ui/__main__.py +16 -0
- kavier_ui/app.py +264 -0
- kavier_ui/prompts.py +243 -0
- kavier_ui/render.py +202 -0
- kavier_ui/sims.py +37 -0
- kavier_ui/theme.py +46 -0
kavier/__init__.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Umbrella facade: the public ``inference``/``training`` API plus ``kavier.<sub>`` legacy aliases."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib as _importlib
|
|
6
|
+
import sys as _sys
|
|
7
|
+
from importlib.abc import Loader as _Loader
|
|
8
|
+
from importlib.abc import MetaPathFinder as _MetaPathFinder
|
|
9
|
+
from importlib.machinery import ModuleSpec as _ModuleSpec
|
|
10
|
+
from importlib.metadata import PackageNotFoundError as _PackageNotFoundError
|
|
11
|
+
from importlib.metadata import version as _pkg_version
|
|
12
|
+
from types import ModuleType as _ModuleType
|
|
13
|
+
from typing import Any as _Any
|
|
14
|
+
from typing import Sequence as _Sequence
|
|
15
|
+
|
|
16
|
+
# Pure aliases: ``kavier.<sub>`` (bare AND deep) resolves to the same object as ``kavier_<sub>``.
|
|
17
|
+
_ALIAS_TO_LEGACY = {
|
|
18
|
+
"io": "kavier_io",
|
|
19
|
+
"energy": "kavier_energy",
|
|
20
|
+
"co2": "kavier_co2",
|
|
21
|
+
"library": "kavier_library",
|
|
22
|
+
"opendc": "kavier_io.opendc",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# ``inference`` and ``training`` are REAL packages here (the public predictor API). The bare module is
|
|
26
|
+
# the new package, but DEEP imports (``kavier.training.core.calibration`` ...) still resolve to the
|
|
27
|
+
# legacy package — preserving the live calibration ``_CAL`` swap contract (one module per spelling).
|
|
28
|
+
_API_PACKAGES = {
|
|
29
|
+
"inference": "kavier_inference",
|
|
30
|
+
"training": "kavier_training",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class _LegacyAliasFinder(_MetaPathFinder):
|
|
35
|
+
_prefix = f"{__name__}."
|
|
36
|
+
|
|
37
|
+
def find_spec(
|
|
38
|
+
self,
|
|
39
|
+
fullname: str,
|
|
40
|
+
path: _Sequence[str] | None = None,
|
|
41
|
+
target: _ModuleType | None = None,
|
|
42
|
+
) -> _ModuleSpec | None:
|
|
43
|
+
if not fullname.startswith(self._prefix):
|
|
44
|
+
return None
|
|
45
|
+
tail = fullname[len(self._prefix) :]
|
|
46
|
+
head, _, rest = tail.partition(".")
|
|
47
|
+
legacy_root = _ALIAS_TO_LEGACY.get(head)
|
|
48
|
+
if legacy_root is None:
|
|
49
|
+
# Deep imports under the real API packages alias to legacy; the bare package does not.
|
|
50
|
+
if rest and head in _API_PACKAGES:
|
|
51
|
+
legacy_root = _API_PACKAGES[head]
|
|
52
|
+
else:
|
|
53
|
+
return None
|
|
54
|
+
legacy_name = legacy_root if not rest else f"{legacy_root}.{rest}"
|
|
55
|
+
spec = _ModuleSpec(fullname, _LegacyAliasLoader(legacy_name))
|
|
56
|
+
return spec
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class _LegacyAliasLoader(_Loader):
|
|
60
|
+
def __init__(self, legacy_name: str) -> None:
|
|
61
|
+
self._legacy_name = legacy_name
|
|
62
|
+
|
|
63
|
+
def create_module(self, spec: _ModuleSpec) -> _ModuleType:
|
|
64
|
+
module = _importlib.import_module(self._legacy_name)
|
|
65
|
+
_sys.modules[spec.name] = module
|
|
66
|
+
return module
|
|
67
|
+
|
|
68
|
+
def exec_module(self, module: _ModuleType) -> None: # already executed
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
_sys.meta_path.insert(0, _LegacyAliasFinder())
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def __getattr__(name: str) -> _Any:
|
|
76
|
+
legacy = _ALIAS_TO_LEGACY.get(name)
|
|
77
|
+
if legacy is not None:
|
|
78
|
+
return _importlib.import_module(f"{__name__}.{name}")
|
|
79
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
from kavier import inference, training # noqa: E402
|
|
83
|
+
from kavier_library.gpu import GPU_SPEC_LIBRARY # noqa: E402
|
|
84
|
+
from kavier_library.llm import LLM_SPEC_LIBRARY # noqa: E402
|
|
85
|
+
from kavier_training.core.engine import ( # noqa: E402
|
|
86
|
+
simulate_full_training,
|
|
87
|
+
simulate_training_step,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
__all__ = [
|
|
91
|
+
"simulate_training_step",
|
|
92
|
+
"simulate_full_training",
|
|
93
|
+
"GPU_SPEC_LIBRARY",
|
|
94
|
+
"LLM_SPEC_LIBRARY",
|
|
95
|
+
"training",
|
|
96
|
+
"inference",
|
|
97
|
+
"io",
|
|
98
|
+
"energy",
|
|
99
|
+
"co2",
|
|
100
|
+
"library",
|
|
101
|
+
"opendc",
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
# Version from installed dist metadata; pyproject's static ``version`` is the single source of truth.
|
|
105
|
+
try:
|
|
106
|
+
__version__ = _pkg_version("kavier")
|
|
107
|
+
except _PackageNotFoundError: # editable/source tree without dist metadata
|
|
108
|
+
__version__ = "0.0.0+unknown"
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Inference predictors: ``performance / energy / efficiency / carbon`` over a batch of serving workloads.
|
|
2
|
+
|
|
3
|
+
Each verb takes a *batch* — a pandas DataFrame, a list of dicts, or a single dict (one row per
|
|
4
|
+
workload) — and returns a DataFrame: the input rows plus the predicted columns. The per-row engine
|
|
5
|
+
logic is canonical here; ``kavier_ui.sims`` and the CLIs consume it, so the numbers match exactly.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import datetime as dt
|
|
11
|
+
import time
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from kavier_co2.engine import CarbonTrace, Fragment, compute_emissions
|
|
19
|
+
from kavier_inference.core.cache import PrefixCache
|
|
20
|
+
from kavier_inference.core.config import CacheCfg, SimConfig
|
|
21
|
+
from kavier_inference.core.metrics import Metrics
|
|
22
|
+
from kavier_inference.core.runner import simulate_one
|
|
23
|
+
from kavier_library import get_gpu, get_llm
|
|
24
|
+
|
|
25
|
+
# Defaults for workload keys a batch may omit (mirror the UI prompt defaults).
|
|
26
|
+
DEFAULT_KV_CACHE = True
|
|
27
|
+
DEFAULT_PREFIX_POLICY = "prefill"
|
|
28
|
+
DEFAULT_PREFIX_MIN_TOKENS = 1024
|
|
29
|
+
DEFAULT_INTENSITY_G_KWH = 400.0
|
|
30
|
+
DEFAULT_GPU_HOUR_PRICE = 2.5
|
|
31
|
+
|
|
32
|
+
Batch = "pd.DataFrame | list[dict[str, Any]] | dict[str, Any]"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _drop_missing(row: dict[str, Any]) -> dict[str, Any]:
|
|
36
|
+
"""Drop NaN/None cells so ``.get(key)`` means 'absent' — a heterogeneous DataFrame fills gaps with NaN."""
|
|
37
|
+
out: dict[str, Any] = {}
|
|
38
|
+
for k, v in row.items():
|
|
39
|
+
if v is None:
|
|
40
|
+
continue
|
|
41
|
+
if isinstance(v, float) and pd.isna(v):
|
|
42
|
+
continue
|
|
43
|
+
out[k] = v
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _normalise(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> list[dict[str, Any]]:
|
|
48
|
+
"""Coerce a DataFrame | list[dict] | single dict into a list of plain row dicts (NaN cells dropped)."""
|
|
49
|
+
if isinstance(batch, pd.DataFrame):
|
|
50
|
+
records: list[dict[str, Any]] = [{str(k): v for k, v in rec.items()} for rec in batch.to_dict(orient="records")]
|
|
51
|
+
elif isinstance(batch, dict):
|
|
52
|
+
records = [dict(batch)]
|
|
53
|
+
else:
|
|
54
|
+
records = [dict(row) for row in batch]
|
|
55
|
+
return [_drop_missing(row) for row in records]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _infer_params(row: dict[str, Any]) -> dict[str, Any]:
|
|
59
|
+
"""Fill the inference-engine keys, defaulting cache settings the caller may omit."""
|
|
60
|
+
return {
|
|
61
|
+
**row,
|
|
62
|
+
"kv_cache": row.get("kv_cache", DEFAULT_KV_CACHE),
|
|
63
|
+
"prefix_policy": row.get("prefix_policy", DEFAULT_PREFIX_POLICY),
|
|
64
|
+
"prefix_min_tokens": row.get("prefix_min_tokens", DEFAULT_PREFIX_MIN_TOKENS),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def run_inference(p: dict[str, Any]) -> dict[str, Any]:
|
|
69
|
+
"""Loop ``simulate_one`` over a homogeneous workload (same engine as the CLI, no disk I/O)."""
|
|
70
|
+
llm = get_llm(p["model"])
|
|
71
|
+
gpu = get_gpu(p["gpu"])
|
|
72
|
+
cfg = SimConfig(
|
|
73
|
+
export_rate=0.1,
|
|
74
|
+
kv_cache=bool(p["kv_cache"]),
|
|
75
|
+
cache=CacheCfg(min_len=int(p["prefix_min_tokens"]), action=p["prefix_policy"], scope="session", max_entries=10),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
n = int(p["num_requests"])
|
|
79
|
+
n_in, n_out = int(p["input_tokens"]), int(p["output_tokens"])
|
|
80
|
+
cache = PrefixCache(cfg.cache)
|
|
81
|
+
metrics = Metrics()
|
|
82
|
+
t0 = int(time.time_ns() / 1e6)
|
|
83
|
+
ttfts: list[float] = []
|
|
84
|
+
tasks: list[dict[str, Any]] = []
|
|
85
|
+
for i in range(n):
|
|
86
|
+
task, _frags, t_p, t_d = simulate_one(
|
|
87
|
+
idx=i,
|
|
88
|
+
session_id=None,
|
|
89
|
+
n_in_tokens=n_in,
|
|
90
|
+
n_out_tokens=n_out,
|
|
91
|
+
in_tokens=None,
|
|
92
|
+
llm=llm,
|
|
93
|
+
gpu=gpu,
|
|
94
|
+
cache=cache,
|
|
95
|
+
cfg=cfg,
|
|
96
|
+
export_rate_s=cfg.export_rate,
|
|
97
|
+
t0_ms=t0,
|
|
98
|
+
)
|
|
99
|
+
metrics.add(t_p, t_d, (t_p + t_d) * 1000.0)
|
|
100
|
+
ttfts.append(t_p * 1000.0)
|
|
101
|
+
tasks.append(task)
|
|
102
|
+
|
|
103
|
+
total_s = metrics.sum_prefill + metrics.sum_decode
|
|
104
|
+
total_tokens = n * (n_in + n_out)
|
|
105
|
+
lat = np.asarray(metrics.latencies)
|
|
106
|
+
return {
|
|
107
|
+
"model": llm.name,
|
|
108
|
+
"gpu": gpu.name,
|
|
109
|
+
"num_requests": n,
|
|
110
|
+
"input_tokens": n_in,
|
|
111
|
+
"output_tokens": n_out,
|
|
112
|
+
"kv_cache": cfg.kv_cache,
|
|
113
|
+
"prefix_policy": cfg.cache.action,
|
|
114
|
+
"prefix_min_tokens": cfg.cache.min_len,
|
|
115
|
+
"prefill_s": metrics.sum_prefill,
|
|
116
|
+
"decode_s": metrics.sum_decode,
|
|
117
|
+
"total_s": total_s,
|
|
118
|
+
"mean_ttft_ms": float(np.mean(ttfts)),
|
|
119
|
+
"p50_ms": float(np.percentile(lat, 50)),
|
|
120
|
+
"p95_ms": float(np.percentile(lat, 95)),
|
|
121
|
+
"p99_ms": float(np.percentile(lat, 99)),
|
|
122
|
+
"throughput_req_s": n / total_s if total_s else 0.0,
|
|
123
|
+
"throughput_tok_s": total_tokens / total_s if total_s else 0.0,
|
|
124
|
+
"total_tokens": total_tokens,
|
|
125
|
+
"cache_hits": cache.hits,
|
|
126
|
+
"cache_hit_ratio": cache.hits / n if n else 0.0,
|
|
127
|
+
"evictions": cache.evictions,
|
|
128
|
+
"_tasks": tasks, # reused by the energy chain
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _flat_trace(start: pd.Timestamp, hours: float, intensity_g_kwh: float) -> CarbonTrace:
|
|
133
|
+
"""Constant-intensity trace so ``compute_emissions`` runs without an external grid trace."""
|
|
134
|
+
rows = max(2, int(hours) + 2)
|
|
135
|
+
df = pd.DataFrame(
|
|
136
|
+
{
|
|
137
|
+
"timestamp": [start + dt.timedelta(hours=h) for h in range(rows)],
|
|
138
|
+
"carbon_intensity": [float(intensity_g_kwh)] * rows,
|
|
139
|
+
}
|
|
140
|
+
)
|
|
141
|
+
return CarbonTrace.from_dataframe(df)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def run_carbon_from_inference(infer: dict[str, Any], intensity_g_kwh: float) -> dict[str, Any]:
|
|
145
|
+
"""Bill the GPU's max power over the summed busy time against a flat intensity."""
|
|
146
|
+
gpu = get_gpu(infer["gpu"])
|
|
147
|
+
runtime_s = float(infer["total_s"])
|
|
148
|
+
power_w = float(gpu.max_power_w)
|
|
149
|
+
start = pd.Timestamp("2026-01-01 00:00:00")
|
|
150
|
+
trace = _flat_trace(start, runtime_s / 3600.0, intensity_g_kwh)
|
|
151
|
+
frag = Fragment(start_time=start, duration_s=runtime_s, power_w=power_w)
|
|
152
|
+
res = compute_emissions([frag], trace)
|
|
153
|
+
return {
|
|
154
|
+
"source": "inference",
|
|
155
|
+
"model": infer["model"],
|
|
156
|
+
"gpu": infer["gpu"],
|
|
157
|
+
"intensity": float(intensity_g_kwh),
|
|
158
|
+
"runtime_s": runtime_s,
|
|
159
|
+
"power_w": power_w,
|
|
160
|
+
"total_energy_kwh": res.total_energy_kwh,
|
|
161
|
+
"total_co2_g": res.total_co2_g,
|
|
162
|
+
"total_co2_kg": res.total_co2_kg,
|
|
163
|
+
"total_tokens": infer["total_tokens"],
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def energy_from_inference(infer: dict[str, Any], gpu_hour_price: float | None) -> dict[str, Any]:
|
|
168
|
+
"""$/Mtoken from GPU-hours, matching kavier_energy.metrics.financial_efficiency."""
|
|
169
|
+
carbon = run_carbon_from_inference(infer, intensity_g_kwh=DEFAULT_INTENSITY_G_KWH)
|
|
170
|
+
total_tokens = infer["total_tokens"]
|
|
171
|
+
energy_wh = carbon["total_energy_kwh"] * 1000.0
|
|
172
|
+
per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
|
|
173
|
+
gpu_hours = infer["total_s"] / 3600.0
|
|
174
|
+
return {
|
|
175
|
+
"model": infer["model"],
|
|
176
|
+
"gpu": infer["gpu"],
|
|
177
|
+
"total_tokens": total_tokens,
|
|
178
|
+
"energy_wh": energy_wh,
|
|
179
|
+
"energy_kwh": carbon["total_energy_kwh"],
|
|
180
|
+
"energy_per_mtoken_wh": energy_wh * per_m,
|
|
181
|
+
"carbon_per_mtoken_g": carbon["total_co2_g"] * per_m,
|
|
182
|
+
"gpu_hours": gpu_hours,
|
|
183
|
+
"financial_per_mtoken": (gpu_hours * gpu_hour_price * per_m) if gpu_hour_price else None,
|
|
184
|
+
"tokens_per_wh": total_tokens / energy_wh if energy_wh else 0.0,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def export_opendc(infer: dict[str, Any], dst: Path) -> Path:
|
|
189
|
+
"""Write the inference run's tasks/fragments as OpenDC input via the real adapter."""
|
|
190
|
+
from kavier_io.opendc.adapter import prepare_opendc_input
|
|
191
|
+
|
|
192
|
+
tasks = pd.DataFrame(infer["_tasks"])
|
|
193
|
+
# 1 fragment per task suffices for OpenDC's power model; adapter coerces the schema.
|
|
194
|
+
frags = pd.DataFrame(
|
|
195
|
+
[
|
|
196
|
+
{
|
|
197
|
+
"id": t["id"],
|
|
198
|
+
"duration": t["duration"],
|
|
199
|
+
"cpu_count": 1,
|
|
200
|
+
"cpu_usage": 0.0,
|
|
201
|
+
"gpu_count": 1,
|
|
202
|
+
"gpu_usage": t["gpu_capacity"],
|
|
203
|
+
}
|
|
204
|
+
for t in infer["_tasks"]
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
dst.mkdir(parents=True, exist_ok=True)
|
|
208
|
+
prepare_opendc_input(tasks, frags, str(dst))
|
|
209
|
+
return dst
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _with_columns(rows: list[dict[str, Any]], predicted: list[dict[str, Any]]) -> pd.DataFrame:
|
|
213
|
+
"""Input rows + predicted columns, one output row per input row."""
|
|
214
|
+
merged = [{**row, **pred} for row, pred in zip(rows, predicted)]
|
|
215
|
+
return pd.DataFrame(merged)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def performance(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
219
|
+
"""Per-workload latency/throughput: + p50_ms, p95_ms, mean_ttft_ms, throughput_tok_s, total_s."""
|
|
220
|
+
rows = _normalise(batch)
|
|
221
|
+
cols = ("p50_ms", "p95_ms", "mean_ttft_ms", "throughput_tok_s", "throughput_req_s", "total_s", "total_tokens")
|
|
222
|
+
predicted: list[dict[str, Any]] = []
|
|
223
|
+
for row in rows:
|
|
224
|
+
r = run_inference(_infer_params(row))
|
|
225
|
+
predicted.append({k: r[k] for k in cols})
|
|
226
|
+
return _with_columns(rows, predicted)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def energy(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
230
|
+
"""Per-workload energy (self-contained GPU-power estimate): + energy_wh, energy_per_mtoken_wh, tokens_per_wh."""
|
|
231
|
+
rows = _normalise(batch)
|
|
232
|
+
cols = ("energy_wh", "energy_kwh", "energy_per_mtoken_wh", "tokens_per_wh", "total_tokens")
|
|
233
|
+
predicted: list[dict[str, Any]] = []
|
|
234
|
+
for row in rows:
|
|
235
|
+
e = energy_from_inference(run_inference(_infer_params(row)), gpu_hour_price=None)
|
|
236
|
+
predicted.append({k: e[k] for k in cols})
|
|
237
|
+
return _with_columns(rows, predicted)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def efficiency(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
241
|
+
"""Per-workload cost: + financial_per_mtoken ($/Mtoken). GPU $/hour from a ``gpu_hour_price`` column else 2.5."""
|
|
242
|
+
rows = _normalise(batch)
|
|
243
|
+
predicted: list[dict[str, Any]] = []
|
|
244
|
+
for row in rows:
|
|
245
|
+
price = float(row.get("gpu_hour_price", DEFAULT_GPU_HOUR_PRICE))
|
|
246
|
+
e = energy_from_inference(run_inference(_infer_params(row)), gpu_hour_price=price)
|
|
247
|
+
predicted.append({"financial_per_mtoken": e["financial_per_mtoken"], "gpu_hours": e["gpu_hours"]})
|
|
248
|
+
return _with_columns(rows, predicted)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def carbon(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
252
|
+
"""Per-workload emissions: + total_co2_g, carbon_per_mtoken_g. Intensity from an ``intensity`` column else 400."""
|
|
253
|
+
rows = _normalise(batch)
|
|
254
|
+
predicted: list[dict[str, Any]] = []
|
|
255
|
+
for row in rows:
|
|
256
|
+
intensity = float(row.get("intensity", DEFAULT_INTENSITY_G_KWH))
|
|
257
|
+
infer = run_inference(_infer_params(row))
|
|
258
|
+
c = run_carbon_from_inference(infer, intensity_g_kwh=intensity)
|
|
259
|
+
total_tokens = c["total_tokens"]
|
|
260
|
+
per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
|
|
261
|
+
predicted.append(
|
|
262
|
+
{
|
|
263
|
+
"total_co2_g": c["total_co2_g"],
|
|
264
|
+
"total_co2_kg": c["total_co2_kg"],
|
|
265
|
+
"carbon_per_mtoken_g": c["total_co2_g"] * per_m,
|
|
266
|
+
"total_energy_kwh": c["total_energy_kwh"],
|
|
267
|
+
}
|
|
268
|
+
)
|
|
269
|
+
return _with_columns(rows, predicted)
|
kavier/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Training predictors: ``performance / energy / efficiency / carbon`` over a batch of fine-tuning jobs.
|
|
2
|
+
|
|
3
|
+
Each verb takes a *batch* — a pandas DataFrame, a list of dicts, or a single dict (one row per job)
|
|
4
|
+
— and returns a DataFrame: the input rows plus the predicted columns. The per-row engine logic is
|
|
5
|
+
canonical here; ``kavier_ui.sims`` and the CLIs consume it, so the numbers match exactly.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from kavier.inference import (
|
|
15
|
+
DEFAULT_GPU_HOUR_PRICE,
|
|
16
|
+
DEFAULT_INTENSITY_G_KWH,
|
|
17
|
+
_flat_trace,
|
|
18
|
+
_normalise,
|
|
19
|
+
_with_columns,
|
|
20
|
+
)
|
|
21
|
+
from kavier_co2.engine import Fragment, compute_emissions
|
|
22
|
+
from kavier_training.core.engine import simulate_full_training, simulate_training_step
|
|
23
|
+
|
|
24
|
+
DEFAULT_NUM_NODES = 1
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _train_params(row: dict[str, Any]) -> dict[str, Any]:
|
|
28
|
+
"""Default ``num_nodes`` (the only training key a batch commonly omits)."""
|
|
29
|
+
return {**row, "num_nodes": row.get("num_nodes", DEFAULT_NUM_NODES)}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def run_training(p: dict[str, Any]) -> dict[str, Any]:
|
|
33
|
+
"""Aggregate throughput/runtime (``simulate_full_training``) + per-step metrics (``simulate_training_step``)."""
|
|
34
|
+
full = simulate_full_training(
|
|
35
|
+
model_name=p["model"],
|
|
36
|
+
method=p["method"],
|
|
37
|
+
gpu_model=p["gpu"],
|
|
38
|
+
tokens_per_sample=int(p["seq_len"]),
|
|
39
|
+
batch_size=int(p["batch_size"]),
|
|
40
|
+
number_gpus=int(p["num_gpus"]),
|
|
41
|
+
number_nodes=int(p["num_nodes"]),
|
|
42
|
+
total_tokens=int(p["total_tokens"]) if p.get("total_tokens") else None,
|
|
43
|
+
epochs=float(p["epochs"]) if p.get("epochs") else None,
|
|
44
|
+
dataset_tokens=int(p["dataset_tokens"]) if p.get("dataset_tokens") else None,
|
|
45
|
+
)
|
|
46
|
+
total_gpus = int(p["num_gpus"]) * int(p["num_nodes"])
|
|
47
|
+
step = simulate_training_step(
|
|
48
|
+
model_name=p["model"],
|
|
49
|
+
gpu_model=p["gpu"],
|
|
50
|
+
tokens_per_sample=int(p["seq_len"]),
|
|
51
|
+
batch_size=int(p["batch_size"]),
|
|
52
|
+
method=p["method"],
|
|
53
|
+
num_gpus=total_gpus,
|
|
54
|
+
num_nodes=int(p["num_nodes"]),
|
|
55
|
+
)
|
|
56
|
+
out: dict[str, Any] = {**full, **step, "total_gpus": total_gpus}
|
|
57
|
+
out["aggregate_power_w"] = step["gpu_power_watts"] * total_gpus
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def run_carbon_from_training(p: dict[str, Any]) -> dict[str, Any]:
|
|
62
|
+
"""Bill one training-engine power fragment against a flat carbon intensity."""
|
|
63
|
+
tr = run_training(p)
|
|
64
|
+
runtime_s = float(tr["train_runtime"])
|
|
65
|
+
if runtime_s <= 0:
|
|
66
|
+
raise ValueError("training runtime is 0 — set a job size (total tokens or epochs) to bill carbon")
|
|
67
|
+
power_w = float(tr["aggregate_power_w"])
|
|
68
|
+
start = pd.Timestamp("2026-01-01 00:00:00")
|
|
69
|
+
trace = _flat_trace(start, runtime_s / 3600.0, p["intensity"])
|
|
70
|
+
frag = Fragment(start_time=start, duration_s=runtime_s, power_w=power_w)
|
|
71
|
+
res = compute_emissions([frag], trace)
|
|
72
|
+
return {
|
|
73
|
+
"source": "training",
|
|
74
|
+
"model": tr["model_name"],
|
|
75
|
+
"gpu": tr["gpu_name"],
|
|
76
|
+
"intensity": float(p["intensity"]),
|
|
77
|
+
"runtime_s": runtime_s,
|
|
78
|
+
"power_w": power_w,
|
|
79
|
+
"total_energy_kwh": res.total_energy_kwh,
|
|
80
|
+
"total_co2_g": res.total_co2_g,
|
|
81
|
+
"total_co2_kg": res.total_co2_kg,
|
|
82
|
+
"total_tokens": tr["total_tokens"],
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def performance(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
87
|
+
"""Per-job throughput/util: + train_tokens_per_second, train_runtime, gpu_compute_utilization, gpu_power_watts."""
|
|
88
|
+
rows = _normalise(batch)
|
|
89
|
+
cols = (
|
|
90
|
+
"train_tokens_per_second",
|
|
91
|
+
"train_runtime",
|
|
92
|
+
"train_samples_per_second",
|
|
93
|
+
"gpu_compute_utilization",
|
|
94
|
+
"gpu_memory_utilization",
|
|
95
|
+
"gpu_power_watts",
|
|
96
|
+
"total_tokens",
|
|
97
|
+
)
|
|
98
|
+
predicted: list[dict[str, Any]] = []
|
|
99
|
+
for row in rows:
|
|
100
|
+
r = run_training(_train_params(row))
|
|
101
|
+
predicted.append({k: r[k] for k in cols})
|
|
102
|
+
return _with_columns(rows, predicted)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def energy(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
106
|
+
"""Per-job energy (self-contained GPU-power estimate): + energy_wh, energy_per_mtoken_wh, aggregate_power_w."""
|
|
107
|
+
rows = _normalise(batch)
|
|
108
|
+
predicted: list[dict[str, Any]] = []
|
|
109
|
+
for row in rows:
|
|
110
|
+
c = run_carbon_from_training({**_train_params(row), "intensity": DEFAULT_INTENSITY_G_KWH})
|
|
111
|
+
total_tokens = c["total_tokens"]
|
|
112
|
+
energy_wh = c["total_energy_kwh"] * 1000.0
|
|
113
|
+
per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
|
|
114
|
+
predicted.append(
|
|
115
|
+
{
|
|
116
|
+
"energy_wh": energy_wh,
|
|
117
|
+
"energy_kwh": c["total_energy_kwh"],
|
|
118
|
+
"energy_per_mtoken_wh": energy_wh * per_m,
|
|
119
|
+
"aggregate_power_w": c["power_w"],
|
|
120
|
+
"total_tokens": total_tokens,
|
|
121
|
+
}
|
|
122
|
+
)
|
|
123
|
+
return _with_columns(rows, predicted)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def efficiency(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
127
|
+
"""Per-job cost: + financial_per_mtoken ($/Mtoken). GPU $/hour from a ``gpu_hour_price`` column else 2.5."""
|
|
128
|
+
rows = _normalise(batch)
|
|
129
|
+
predicted: list[dict[str, Any]] = []
|
|
130
|
+
for row in rows:
|
|
131
|
+
tr = run_training(_train_params(row))
|
|
132
|
+
total_tokens = tr["total_tokens"]
|
|
133
|
+
runtime_s = float(tr["train_runtime"])
|
|
134
|
+
price = float(row.get("gpu_hour_price", DEFAULT_GPU_HOUR_PRICE))
|
|
135
|
+
# GPU-hours = wall-clock runtime x total GPUs (matches the inference $/Mtoken basis).
|
|
136
|
+
gpu_hours = runtime_s / 3600.0 * int(tr["total_gpus"])
|
|
137
|
+
per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
|
|
138
|
+
predicted.append(
|
|
139
|
+
{
|
|
140
|
+
"financial_per_mtoken": (gpu_hours * price * per_m) if total_tokens else None,
|
|
141
|
+
"gpu_hours": gpu_hours,
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
return _with_columns(rows, predicted)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def carbon(batch: pd.DataFrame | list[dict[str, Any]] | dict[str, Any]) -> pd.DataFrame:
|
|
148
|
+
"""Per-job emissions: + total_co2_g, carbon_per_mtoken_g. Intensity from an ``intensity`` column else 400."""
|
|
149
|
+
rows = _normalise(batch)
|
|
150
|
+
predicted: list[dict[str, Any]] = []
|
|
151
|
+
for row in rows:
|
|
152
|
+
intensity = float(row.get("intensity", DEFAULT_INTENSITY_G_KWH))
|
|
153
|
+
c = run_carbon_from_training({**_train_params(row), "intensity": intensity})
|
|
154
|
+
total_tokens = c["total_tokens"]
|
|
155
|
+
per_m = 1_000_000.0 / total_tokens if total_tokens else 0.0
|
|
156
|
+
predicted.append(
|
|
157
|
+
{
|
|
158
|
+
"total_co2_g": c["total_co2_g"],
|
|
159
|
+
"total_co2_kg": c["total_co2_kg"],
|
|
160
|
+
"carbon_per_mtoken_g": c["total_co2_g"] * per_m,
|
|
161
|
+
"total_energy_kwh": c["total_energy_kwh"],
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
return _with_columns(rows, predicted)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kavier
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Kavier: Simulating the Performance, Sustainability, and Efficiency of LLM Ecosystems under Inference and Training
|
|
5
|
+
Author: Radu Nicolae
|
|
6
|
+
Author-email: AtLarge Research <info@atlarge-research.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/atlarge-research/kavier
|
|
9
|
+
Project-URL: Repository, https://github.com/atlarge-research/kavier
|
|
10
|
+
Project-URL: Documentation, https://github.com/atlarge-research/kavier
|
|
11
|
+
Project-URL: Bug Tracker, https://github.com/atlarge-research/kavier/issues
|
|
12
|
+
Keywords: LLM,simulation,performance,sustainability,energy,carbon,GPU,training,inference
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: Intended Audience :: Developers
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE.txt
|
|
23
|
+
Requires-Dist: pydantic>=2.7
|
|
24
|
+
Requires-Dist: pandas>=2.2
|
|
25
|
+
Requires-Dist: pyarrow>=23.0.1
|
|
26
|
+
Requires-Dist: numpy>=2.2.6
|
|
27
|
+
Requires-Dist: tqdm>=4.67.1
|
|
28
|
+
Requires-Dist: cachetools>=6.1
|
|
29
|
+
Requires-Dist: rich>=13
|
|
30
|
+
Requires-Dist: pyyaml>=6
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=8.2; extra == "dev"
|
|
33
|
+
Requires-Dist: hypothesis>=6.102; extra == "dev"
|
|
34
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
35
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
36
|
+
Requires-Dist: pandas-stubs>=2.2; extra == "dev"
|
|
37
|
+
Requires-Dist: types-tqdm; extra == "dev"
|
|
38
|
+
Requires-Dist: types-cachetools; extra == "dev"
|
|
39
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
|
|
42
|
+
# Kavier
|
|
43
|
+
|
|
44
|
+
Simulating performance, sustainability, and efficiency of LLM Ecosystems under inference and training.
|
|
45
|
+
|
|
46
|
+
[](LICENSE)
|
|
47
|
+
[](docs/index.html)
|
|
48
|
+
[](https://github.com/atlarge-research/kavier/actions/workflows/ci.yml)
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
This repository is the home of Kavier, the first scientific instrument for
|
|
53
|
+
predicting performance, sustainability, and efficiency of LLM ecosystems under
|
|
54
|
+
inference and training.
|
|
55
|
+
|
|
56
|
+
Kavier helps operators, researchers, and engineers predict:
|
|
57
|
+
* **Performance** — inference latencies, training throughput, GPU utilization
|
|
58
|
+
* **Sustainability** — energy consumption, carbon emissions (gCO2/Mtoken)
|
|
59
|
+
* **Efficiency** — financial and energy cost per token/sample given GPU-hour prices
|
|
60
|
+
|
|
61
|
+
## Quick start
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
git clone https://github.com/atlarge-research/kavier.git
|
|
65
|
+
cd kavier
|
|
66
|
+
|
|
67
|
+
python -m venv .venv
|
|
68
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
69
|
+
python -m pip install -U pip
|
|
70
|
+
pip install -e ".[dev]"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Run your first simulation against the tiny bundled synthetic example trace:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
kavier-perf --trace src/kavier_inference/data/input/input_example.csv
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Congrats! You have just run your first simulation with Kavier! 🎉
|
|
80
|
+
|
|
81
|
+
Or skip the flags — launch the **interactive UI** and pick a simulator, model and
|
|
82
|
+
GPU from guided menus, then chain into energy/carbon or export OpenDC:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
kavier
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
If you installed Kavier from PyPI (`pip install kavier`) you have no `src/`
|
|
89
|
+
directory; the same synthetic example trace ships inside the package, so resolve
|
|
90
|
+
its path via `importlib.resources`:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
TRACE=$(python -c "from importlib.resources import files; print(files('kavier_inference')/'data/input/input_example.csv')")
|
|
94
|
+
kavier-perf --trace "$TRACE"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Structure
|
|
98
|
+
|
|
99
|
+
Kavier is organized into the following first-party packages:
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
src/
|
|
103
|
+
├── kavier/ # Umbrella facade (re-exports the sub-packages)
|
|
104
|
+
├── kavier_inference/ # Inference simulation (kavier-perf)
|
|
105
|
+
├── kavier_training/ # Training simulation (kavier-train)
|
|
106
|
+
├── kavier_energy/ # Energy calculator (kavier-energy)
|
|
107
|
+
├── kavier_co2/ # Carbon emissions (kavier-co2)
|
|
108
|
+
├── kavier_library/ # Shared GPU & LLM specifications
|
|
109
|
+
├── kavier_io/ # Shared I/O utilities
|
|
110
|
+
│ └── opendc/ # OpenDC workload export (tasks/fragments)
|
|
111
|
+
├── kavier_ui/ # Interactive REPL (the `kavier` command)
|
|
112
|
+
└── tests/ # Test suites
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Documentation
|
|
116
|
+
|
|
117
|
+
See [docs/index.html](docs/index.html) for the main documentation: getting started,
|
|
118
|
+
the Kavier CLIs (`kavier` interactive UI, `kavier-perf`, `kavier-train`,
|
|
119
|
+
`kavier-energy`, `kavier-co2`), the YAML `--config` input,
|
|
120
|
+
structure, and the contributing guide.
|
|
121
|
+
|
|
122
|
+
## Contributing
|
|
123
|
+
|
|
124
|
+
Questions, suggestions and contributions are welcome and appreciated!
|
|
125
|
+
Please refer to the [contributing guide](docs/contributing.md) for more details.
|
|
126
|
+
|
|
127
|
+
## License
|
|
128
|
+
|
|
129
|
+
Kavier is distributed under the MIT license. See [LICENSE.txt](/LICENSE.txt).
|