gitm-labs 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. gitm/__init__.py +9 -0
  2. gitm/_paths.py +109 -0
  3. gitm/agents/__init__.py +12 -0
  4. gitm/agents/policy.py +48 -0
  5. gitm/api.py +37 -0
  6. gitm/bench/__init__.py +30 -0
  7. gitm/bench/__main__.py +10 -0
  8. gitm/bench/baseline.py +169 -0
  9. gitm/bench/cli.py +290 -0
  10. gitm/bench/edge_manifest.py +138 -0
  11. gitm/bench/manifest.py +183 -0
  12. gitm/bench/profile.py +299 -0
  13. gitm/bench/reproduce.py +120 -0
  14. gitm/bench/results.py +68 -0
  15. gitm/bench/runner.py +137 -0
  16. gitm/bench/schema.py +168 -0
  17. gitm/bench/templates/results.md.j2 +37 -0
  18. gitm/benchmarks/__init__.py +0 -0
  19. gitm/benchmarks/kitti/__init__.py +9 -0
  20. gitm/benchmarks/kitti/baseline.py +249 -0
  21. gitm/benchmarks/kitti/workunit.py +223 -0
  22. gitm/cli.py +120 -0
  23. gitm/doctor.py +29 -0
  24. gitm/kernels/__init__.py +13 -0
  25. gitm/kernels/library.py +24 -0
  26. gitm/kernels/library.yaml +345 -0
  27. gitm/kernels/spec.py +51 -0
  28. gitm/optimizer/__init__.py +32 -0
  29. gitm/optimizer/apply.py +206 -0
  30. gitm/optimizer/attribution.py +90 -0
  31. gitm/optimizer/dr.py +154 -0
  32. gitm/optimizer/invariants.py +45 -0
  33. gitm/optimizer/monitor.py +164 -0
  34. gitm/optimizer/multibasis.py +86 -0
  35. gitm/optimizer/qualification.py +77 -0
  36. gitm/optimizer/replay.py +59 -0
  37. gitm/optimizer/replay_validation.py +125 -0
  38. gitm/optimizer/report.py +110 -0
  39. gitm/optimizer/templates/report.md.j2 +41 -0
  40. gitm/planner/__init__.py +22 -0
  41. gitm/planner/graph.py +117 -0
  42. gitm/planner/roofline.py +96 -0
  43. gitm/routing/__init__.py +0 -0
  44. gitm/routing/scorer_v0.py +89 -0
  45. gitm/scheduler/__init__.py +18 -0
  46. gitm/scheduler/loop.py +205 -0
  47. gitm/telemetry/__init__.py +18 -0
  48. gitm/telemetry/backends/__init__.py +13 -0
  49. gitm/telemetry/backends/amd.py +40 -0
  50. gitm/telemetry/backends/base.py +27 -0
  51. gitm/telemetry/backends/discover.py +43 -0
  52. gitm/telemetry/backends/nvidia.py +141 -0
  53. gitm/telemetry/collector.py +85 -0
  54. gitm/telemetry/schema.py +78 -0
  55. gitm/telemetry/sinks/__init__.py +50 -0
  56. gitm/telemetry/sinks/jsonl.py +33 -0
  57. gitm/telemetry/sinks/otlp.py +54 -0
  58. gitm/telemetry/sinks/prometheus.py +48 -0
  59. gitm/telemetry/sinks/s3.py +52 -0
  60. gitm/tracer/__init__.py +20 -0
  61. gitm/tracer/_cupti/__init__.py +24 -0
  62. gitm/tracer/_cupti/build.py +193 -0
  63. gitm/tracer/_cupti/cupti_shim.c +294 -0
  64. gitm/tracer/_cupti_decode.py +123 -0
  65. gitm/tracer/capture.py +108 -0
  66. gitm/tracer/cupti.py +46 -0
  67. gitm/tracer/schema.py +80 -0
  68. gitm_labs-0.0.1.dist-info/METADATA +264 -0
  69. gitm_labs-0.0.1.dist-info/RECORD +71 -0
  70. gitm_labs-0.0.1.dist-info/WHEEL +4 -0
  71. gitm_labs-0.0.1.dist-info/entry_points.txt +2 -0
gitm/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """gitm — behavioral compiler and intervention runtime."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.0.1"
6
+
7
+ from gitm.api import optimize
8
+
9
+ __all__ = ["__version__", "optimize"]
gitm/_paths.py ADDED
@@ -0,0 +1,109 @@
1
+ """Resolve data locations.
2
+
3
+ GITM's canonical data store is **S3**. Datasets, plus the durable copy of run
4
+ outputs, traces, and telemetry, all live under an ``s3://`` root. Datasets are
5
+ far too large to hold on local disk wholesale (the AlphaFold2 DBs alone are
6
+ ~2.2 TB), so the local filesystem is treated only as *bounded scratch*: the
7
+ active run's working set is staged in, used, and evicted. Nothing here ever
8
+ assumes a dataset lives on local disk.
9
+
10
+ Two roots:
11
+
12
+ * ``$GITM_S3_ROOT`` — ``s3://bucket/prefix``, the canonical store. Datasets at
13
+ ``<s3_root>/datasets/<name>/``; durable run/trace/telemetry archives at
14
+ ``<s3_root>/{runs,traces,telemetry}/``.
15
+ * ``$GITM_SCRATCH`` — a local *ephemeral* directory for the active run's
16
+ outputs and staged inputs (small: a run writes here, then the durable copy is
17
+ synced to S3). Defaults to ``~/.cache/gitm``. Never holds datasets at rest.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import os
23
+ from pathlib import Path
24
+
25
+
26
+ DEFAULT_SCRATCH = "~/.cache/gitm"
27
+
28
+ # Local scratch subdirectories. Note: no ``datasets`` — datasets are never
29
+ # materialized wholesale on local disk; they are staged on demand into
30
+ # ``staging/`` from S3 for the duration of a run, then evicted.
31
+ _SCRATCH_SUBDIRS = ("runs", "traces", "telemetry", "staging")
32
+
33
+
34
+ def s3_root(override: str | None = None) -> str | None:
35
+ """Return the canonical ``s3://`` root, or ``None`` if unconfigured.
36
+
37
+ Resolution order: explicit ``override``, then ``$GITM_S3_ROOT``. Returned
38
+ without a trailing slash. Returns ``None`` when neither is set so callers
39
+ can degrade gracefully (e.g. a local run with no archival). Callers that
40
+ *require* S3 — anything touching datasets — should use :func:`dataset_uri`
41
+ or :func:`require_s3_root`, which raise with a clear message instead.
42
+ """
43
+ raw = override or os.environ.get("GITM_S3_ROOT")
44
+ if not raw:
45
+ return None
46
+ return raw.rstrip("/")
47
+
48
+
49
+ def require_s3_root(override: str | None = None) -> str:
50
+ """Like :func:`s3_root` but raise if no canonical store is configured."""
51
+ root = s3_root(override)
52
+ if root is None:
53
+ raise RuntimeError(
54
+ "No S3 root configured. GITM datasets live in S3 and are never "
55
+ "stored on local disk. Set $GITM_S3_ROOT, e.g.\n"
56
+ " export GITM_S3_ROOT=s3://gitm-data/prod"
57
+ )
58
+ return root
59
+
60
+
61
+ def dataset_uri(name: str, *, s3_root_override: str | None = None) -> str:
62
+ """Canonical ``s3://`` URI for a dataset.
63
+
64
+ ``dataset_uri("hft/hft_1b_seed42")`` -> ``s3://.../datasets/hft/hft_1b_seed42``.
65
+ """
66
+ return f"{require_s3_root(s3_root_override)}/datasets/{name.strip('/')}"
67
+
68
+
69
+ def durable_uri(kind: str, run_id: str, *, s3_root_override: str | None = None) -> str:
70
+ """Canonical ``s3://`` archive URI for a run output.
71
+
72
+ ``kind`` is one of ``runs``, ``traces``, ``telemetry`` — the durable
73
+ destination a scratch artifact is synced to once the run completes.
74
+ """
75
+ if kind not in ("runs", "traces", "telemetry"):
76
+ raise ValueError(f"unknown durable artifact kind: {kind!r}")
77
+ return f"{require_s3_root(s3_root_override)}/{kind}/{run_id}"
78
+
79
+
80
+ def scratch_root(override: str | None = None) -> Path:
81
+ """Return the local scratch directory as an absolute Path.
82
+
83
+ Resolution order: explicit ``override``, then ``$GITM_SCRATCH``, then
84
+ ``~/.cache/gitm``. Ephemeral — holds the active run's outputs and staged
85
+ working set only, never datasets at rest. Created if absent.
86
+ """
87
+ raw = override or os.environ.get("GITM_SCRATCH") or DEFAULT_SCRATCH
88
+ root = Path(raw).expanduser().resolve()
89
+ root.mkdir(parents=True, exist_ok=True)
90
+ for sub in _SCRATCH_SUBDIRS:
91
+ (root / sub).mkdir(parents=True, exist_ok=True)
92
+ return root
93
+
94
+
95
+ def traces_dir(override: str | None = None) -> Path:
96
+ return scratch_root(override) / "traces"
97
+
98
+
99
+ def runs_dir(override: str | None = None) -> Path:
100
+ return scratch_root(override) / "runs"
101
+
102
+
103
+ def telemetry_dir(override: str | None = None) -> Path:
104
+ return scratch_root(override) / "telemetry"
105
+
106
+
107
+ def staging_dir(override: str | None = None) -> Path:
108
+ """Local landing zone for datasets staged in from S3 for the active run."""
109
+ return scratch_root(override) / "staging"
@@ -0,0 +1,12 @@
1
+ """Autonomous decision policy — selects interventions, drives rollback.
2
+
3
+ The agent layer is intentionally thin: rank candidates by predicted delta
4
+ returned from counterfactual replay, pre-filter by safety gate, apply with
5
+ rollback, observe live delta, persist the chain into the provenance trail.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from gitm.agents.policy import Policy, RankedCandidate, select_interventions
11
+
12
+ __all__ = ["Policy", "RankedCandidate", "select_interventions"]
gitm/agents/policy.py ADDED
@@ -0,0 +1,48 @@
1
+ """Selection policy: pre-filter by safety, rank by predicted delta, return top-N."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Iterable
7
+
8
+ from gitm.kernels.spec import InterventionSpec
9
+ from gitm.optimizer.replay import predict_delta
10
+ from gitm.tracer.schema import Trace
11
+
12
+
13
+ @dataclass
14
+ class RankedCandidate:
15
+ spec: InterventionSpec
16
+ predicted_delta: float
17
+ rejected_reason: str | None = None
18
+
19
+
20
+ @dataclass
21
+ class Policy:
22
+ """Greedy by predicted delta with safety pre-filter."""
23
+
24
+ require_qualification_commit: bool = False
25
+ skip_high_risk: bool = False
26
+
27
+
28
+ def select_interventions(
29
+ trace: Trace,
30
+ library: Iterable[InterventionSpec],
31
+ policy: Policy,
32
+ top_n: int = 5,
33
+ ) -> list[RankedCandidate]:
34
+ candidates: list[RankedCandidate] = []
35
+
36
+ for spec in library:
37
+ reason: str | None = None
38
+ if policy.skip_high_risk and spec.safety.tier == "high_risk":
39
+ reason = "policy.skip_high_risk"
40
+ elif spec.safety.requires_qualification_commit and not policy.require_qualification_commit:
41
+ reason = "safety.requires_qualification_commit"
42
+ delta = predict_delta(trace, spec) if reason is None else 0.0
43
+ candidates.append(RankedCandidate(spec=spec, predicted_delta=delta, rejected_reason=reason))
44
+
45
+ candidates.sort(
46
+ key=lambda c: (c.rejected_reason is not None, -c.predicted_delta, c.spec.name)
47
+ )
48
+ return candidates[:top_n]
gitm/api.py ADDED
@@ -0,0 +1,37 @@
1
+ """Public embedded API.
2
+
3
+ from gitm import optimize
4
+ optimize(engine, budget="24h", target=0.15)
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ from gitm.scheduler import LoopConfig, run_loop
12
+
13
+
14
+ def optimize(
15
+ engine: Any | None = None,
16
+ *,
17
+ workload: str | None = None,
18
+ budget: str = "24h",
19
+ target: float = 0.15,
20
+ scratch: str | None = None,
21
+ ) -> dict[str, Any]:
22
+ """Run the autonomous 24-hour optimization loop and return a report.
23
+
24
+ Either pass an ``engine`` (e.g. a running vLLM engine handle) for the
25
+ embedded path, or pass ``workload`` (e.g. ``"vllm-decode"``) for the CLI
26
+ path. ``budget`` and ``target`` follow the SKU contract: a verified floor
27
+ of ``target`` fraction improvement within ``budget`` wall time, or a
28
+ qualification-gate diagnostic explaining why the floor was not committed.
29
+ """
30
+ cfg = LoopConfig(
31
+ engine=engine,
32
+ workload=workload,
33
+ budget=budget,
34
+ target=target,
35
+ scratch=scratch,
36
+ )
37
+ return run_loop(cfg)
gitm/bench/__init__.py ADDED
@@ -0,0 +1,30 @@
1
+ """Shared benchmark systems layer.
2
+
3
+ The benchmark *layer* is deliberately dumb and identical across domains (HFT,
4
+ biotech, edge/robotics) so the runtime layer — planner, deviation monitor,
5
+ causal attribution — does the real work against a heterogeneous workload mix
6
+ without per-benchmark plumbing. Everything domain-specific lives in a single
7
+ ``bench.toml`` per benchmark; everything mechanical lives here and is reused.
8
+
9
+ What this package gives every benchmark pair:
10
+
11
+ * :mod:`gitm.bench.schema` — the canonical data shapes: ``BenchConfig`` (parsed
12
+ ``bench.toml``), ``StallPhase`` (one row of the stall-breakdown table), and
13
+ ``BaselineRun`` (the ``<name>_baseline_N.json`` contract).
14
+ * :mod:`gitm.bench.manifest` — streaming sha256 manifest build + verify, so any
15
+ holder of ``manifest.yaml`` can re-fetch byte-identical TB-scale datasets.
16
+ * :mod:`gitm.bench.baseline` — the two sign-off gates: three seeds agree within
17
+ 2 % (``spread``) and GPU active % stays under the ceiling (``saturation``).
18
+ * :mod:`gitm.bench.profile` — the GITM profiling wrapper around nsys/rocprof +
19
+ py-spy/sar that produces the stall-breakdown table.
20
+ * :mod:`gitm.bench.edge_manifest` — the nuScenes+KITTI ``manifest.jsonl`` builder.
21
+ * :mod:`gitm.bench.results` — renders ``results.md``.
22
+
23
+ Driven from each ``benchmarks/<name>/Makefile`` via ``python -m gitm.bench``.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ from gitm.bench.schema import BaselineRun, BenchConfig, StallPhase
29
+
30
+ __all__ = ["BaselineRun", "BenchConfig", "StallPhase"]
gitm/bench/__main__.py ADDED
@@ -0,0 +1,10 @@
1
+ """``python -m gitm.bench`` entry point."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ from gitm.bench.cli import main
8
+
9
+ if __name__ == "__main__":
10
+ sys.exit(main())
gitm/bench/baseline.py ADDED
@@ -0,0 +1,169 @@
1
+ """Baseline aggregation and the two sign-off gates.
2
+
3
+ A baseline is *locked* when three convergent runs (one per seed) agree on the
4
+ top-line metric within 2 % — the recorded baseline is their mean. Two gates
5
+ decide sign-off, both encoded here so every benchmark is judged identically:
6
+
7
+ * **spread gate** — ``max-min`` over ``mean`` of the three metric values must
8
+ be under ``spread_tolerance`` (default 2 %). Convergence is what makes the
9
+ number trustworthy as an optimization reference.
10
+ * **saturation gate** — wall-clock-weighted GPU active % must stay under
11
+ ``gpu_active_ceiling`` (default 85 %). A saturated benchmark has no residual
12
+ headroom for the runtime to find, so it trips the same-day swap rule.
13
+
14
+ A third, optional check compares the recorded mean against ``baseline_target``
15
+ (e.g. HFT ≥ 25 M events/s) in the configured direction.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import statistics
22
+ from dataclasses import dataclass, field
23
+ from pathlib import Path
24
+
25
+ from gitm.bench.schema import BaselineRun, BenchConfig
26
+
27
+
28
+ def load_runs(paths: list[str | Path]) -> list[BaselineRun]:
29
+ runs = []
30
+ for p in paths:
31
+ runs.append(BaselineRun.model_validate_json(Path(p).read_text()))
32
+ return runs
33
+
34
+
35
+ @dataclass
36
+ class GateResult:
37
+ name: str
38
+ passed: bool
39
+ detail: str
40
+
41
+
42
+ @dataclass
43
+ class BaselineSummary:
44
+ benchmark: str
45
+ metric: str
46
+ n: int
47
+ mean: float
48
+ stddev: float
49
+ spread: float # (max - min) / mean
50
+ gpu_active_overall: float # worst (max) across runs
51
+ recorded: float # the number we publish = mean
52
+ gates: list[GateResult] = field(default_factory=list)
53
+ seeds: list[int] = field(default_factory=list)
54
+
55
+ @property
56
+ def passed(self) -> bool:
57
+ return all(g.passed for g in self.gates)
58
+
59
+ def to_dict(self) -> dict:
60
+ return {
61
+ "benchmark": self.benchmark,
62
+ "metric": self.metric,
63
+ "n": self.n,
64
+ "seeds": self.seeds,
65
+ "recorded": self.recorded,
66
+ "mean": self.mean,
67
+ "stddev": self.stddev,
68
+ "spread": self.spread,
69
+ "gpu_active_overall": self.gpu_active_overall,
70
+ "passed": self.passed,
71
+ "gates": [
72
+ {"name": g.name, "passed": g.passed, "detail": g.detail}
73
+ for g in self.gates
74
+ ],
75
+ }
76
+
77
+
78
+ def aggregate(runs: list[BaselineRun], config: BenchConfig) -> BaselineSummary:
79
+ """Aggregate baseline runs and evaluate the sign-off gates.
80
+
81
+ Does not assume exactly three runs — fewer is a gate failure, more is fine —
82
+ so a pair can iterate on two and still get a meaningful spread reading.
83
+ """
84
+ if not runs:
85
+ raise ValueError("no baseline runs to aggregate")
86
+
87
+ for r in runs:
88
+ if r.benchmark != config.name:
89
+ raise ValueError(
90
+ f"run benchmark {r.benchmark!r} != config {config.name!r}"
91
+ )
92
+ if r.metric != config.metric:
93
+ raise ValueError(f"run metric {r.metric!r} != config {config.metric!r}")
94
+
95
+ values = [r.metric_value for r in runs]
96
+ mean = statistics.fmean(values)
97
+ stddev = statistics.pstdev(values) if len(values) > 1 else 0.0
98
+ spread = (max(values) - min(values)) / mean if mean else float("inf")
99
+ gpu_overall = max(r.gpu_active_overall() for r in runs)
100
+
101
+ gates: list[GateResult] = []
102
+
103
+ # Gate 1: three convergent seeds.
104
+ n_ok = len(runs) >= 3
105
+ spread_ok = spread <= config.spread_tolerance
106
+ gates.append(
107
+ GateResult(
108
+ "count",
109
+ n_ok,
110
+ f"{len(runs)} run(s); need >= 3 convergent seeds",
111
+ )
112
+ )
113
+ gates.append(
114
+ GateResult(
115
+ "spread",
116
+ spread_ok,
117
+ f"spread {spread:.2%} vs tolerance {config.spread_tolerance:.2%}",
118
+ )
119
+ )
120
+
121
+ # Gate 2: saturation / swap rule.
122
+ sat_ok = gpu_overall < config.gpu_active_ceiling
123
+ gates.append(
124
+ GateResult(
125
+ "saturation",
126
+ sat_ok,
127
+ f"GPU active {gpu_overall:.1%} vs ceiling {config.gpu_active_ceiling:.0%}"
128
+ + ("" if sat_ok else " — trips swap rule, shard same day"),
129
+ )
130
+ )
131
+
132
+ # Gate 3 (optional): metric vs target.
133
+ if config.baseline_target is not None:
134
+ if config.target_direction == "ge":
135
+ tgt_ok = mean >= config.baseline_target
136
+ cmp = ">="
137
+ else:
138
+ tgt_ok = mean <= config.baseline_target
139
+ cmp = "<="
140
+ gates.append(
141
+ GateResult(
142
+ "target",
143
+ tgt_ok,
144
+ f"mean {mean:.4g} {cmp} target {config.baseline_target:.4g}",
145
+ )
146
+ )
147
+
148
+ return BaselineSummary(
149
+ benchmark=config.name,
150
+ metric=config.metric,
151
+ n=len(runs),
152
+ mean=mean,
153
+ stddev=stddev,
154
+ spread=spread,
155
+ gpu_active_overall=gpu_overall,
156
+ recorded=mean,
157
+ gates=gates,
158
+ seeds=sorted(r.seed for r in runs),
159
+ )
160
+
161
+
162
+ def aggregate_files(paths: list[str | Path], config: BenchConfig) -> BaselineSummary:
163
+ return aggregate(load_runs(paths), config)
164
+
165
+
166
+ def write_summary(summary: BaselineSummary, out: str | Path) -> Path:
167
+ out = Path(out)
168
+ out.write_text(json.dumps(summary.to_dict(), indent=2) + "\n")
169
+ return out