gitm-labs 0.0.2__tar.gz → 0.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. gitm_labs-0.0.4/.github/workflows/claude-review.yml +98 -0
  2. gitm_labs-0.0.4/.github/workflows/gemini-pr-review.yml +26 -0
  3. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/PKG-INFO +13 -4
  4. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/README.md +4 -3
  5. gitm_labs-0.0.4/benchmarks/edge/datasets_proposal.md +77 -0
  6. gitm_labs-0.0.4/benchmarks/hft/generate.py +14 -0
  7. gitm_labs-0.0.4/benchmarks/hft/harness.py +17 -0
  8. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/kitti/results.md +19 -15
  9. gitm_labs-0.0.4/benchmarks/kitti/spec.md +109 -0
  10. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/__init__.py +1 -1
  11. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/api.py +8 -0
  12. gitm_labs-0.0.4/gitm/benchmarks/edge/__init__.py +10 -0
  13. gitm_labs-0.0.4/gitm/benchmarks/edge/baseline.py +261 -0
  14. gitm_labs-0.0.4/gitm/benchmarks/edge/workunit.py +240 -0
  15. gitm_labs-0.0.4/gitm/benchmarks/hft/__init__.py +8 -0
  16. gitm_labs-0.0.4/gitm/benchmarks/kitti/baseline.py +405 -0
  17. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/benchmarks/kitti/workunit.py +3 -5
  18. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/cli.py +5 -2
  19. gitm_labs-0.0.4/gitm/planner/kitti_graph.py +261 -0
  20. gitm_labs-0.0.2/scripts/run_under_runtime.py → gitm_labs-0.0.4/gitm/runtime_driver.py +92 -29
  21. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/scheduler/loop.py +92 -3
  22. gitm_labs-0.0.4/gitm/tracer/_cupti/__init__.py +76 -0
  23. gitm_labs-0.0.4/gitm/workloads.py +179 -0
  24. gitm_labs-0.0.4/harness/build_edge_yaml_manifest.py +132 -0
  25. gitm_labs-0.0.4/harness/fill_results.py +301 -0
  26. gitm_labs-0.0.4/harness/pod_setup_and_run.sh +182 -0
  27. gitm_labs-0.0.4/harness/run_baselines.sh +108 -0
  28. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/pyproject.toml +23 -1
  29. gitm_labs-0.0.4/scripts/demo_improve_gpu.py +176 -0
  30. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/gpu_setup.sh +10 -3
  31. gitm_labs-0.0.4/scripts/run_under_runtime.py +13 -0
  32. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/verify_infra.sh +1 -1
  33. gitm_labs-0.0.4/skills/gitm-context-store.md +246 -0
  34. gitm_labs-0.0.4/skills/gitm-internal-status-loop.md +266 -0
  35. gitm_labs-0.0.4/tests/test_run_loop_workload.py +94 -0
  36. gitm_labs-0.0.4/tests/test_workload_bootstrap.py +76 -0
  37. gitm_labs-0.0.2/benchmarks/kitti/spec.md +0 -80
  38. gitm_labs-0.0.2/gitm/benchmarks/kitti/baseline.py +0 -249
  39. gitm_labs-0.0.2/gitm/tracer/_cupti/__init__.py +0 -24
  40. gitm_labs-0.0.2/harness/run_baselines.sh +0 -90
  41. gitm_labs-0.0.2/skills/gitm-internal-status-loop.md +0 -178
  42. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/.github/workflows/workflow.yml +0 -0
  43. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/.gitignore +0 -0
  44. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/Dockerfile +0 -0
  45. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/assets/arch.png +0 -0
  46. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/Makefile.common +0 -0
  47. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/README.md +0 -0
  48. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/__init__.py +0 -0
  49. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/_smoke_harness.py +0 -0
  50. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/_templates/datasets.md +0 -0
  51. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/_templates/spec.md +0 -0
  52. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/Makefile +0 -0
  53. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/__init__.py +0 -0
  54. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/bench.smoke.toml +0 -0
  55. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/bench.toml +0 -0
  56. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/datasets.md +0 -0
  57. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/fetch.py +0 -0
  58. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/harness.py +0 -0
  59. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/sanity.py +0 -0
  60. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/spec.md +0 -0
  61. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/Makefile +0 -0
  62. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/__init__.py +0 -0
  63. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/bench.smoke.toml +0 -0
  64. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/bench.toml +0 -0
  65. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/build_manifest.py +0 -0
  66. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/datasets.md +0 -0
  67. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/fetch.py +0 -0
  68. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/harness.py +0 -0
  69. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/kitti_source.py +0 -0
  70. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/nuscenes_source.py +0 -0
  71. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/spec.md +0 -0
  72. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/Makefile +0 -0
  73. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/__init__.py +0 -0
  74. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/bench.smoke.toml +0 -0
  75. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/bench.toml +0 -0
  76. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/datasets.md +0 -0
  77. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/gen_manifest.py +0 -0
  78. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/generator/CMakeLists.txt +0 -0
  79. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/generator/main.cpp +0 -0
  80. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/manifest.yaml +0 -0
  81. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/spec.md +0 -0
  82. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/skeleton/__init__.py +0 -0
  83. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/skeleton/measure_overhead.py +0 -0
  84. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/skeleton/overhead.md +0 -0
  85. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/constraints.txt +0 -0
  86. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/docs/invariants.md +0 -0
  87. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/docs/scoring/input_contract.md +0 -0
  88. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/_paths.py +0 -0
  89. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/agents/__init__.py +0 -0
  90. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/agents/policy.py +0 -0
  91. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/__init__.py +0 -0
  92. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/__main__.py +0 -0
  93. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/baseline.py +0 -0
  94. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/cli.py +0 -0
  95. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/edge_manifest.py +0 -0
  96. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/manifest.py +0 -0
  97. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/profile.py +0 -0
  98. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/reproduce.py +0 -0
  99. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/results.py +0 -0
  100. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/runner.py +0 -0
  101. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/schema.py +0 -0
  102. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/templates/results.md.j2 +0 -0
  103. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/benchmarks/__init__.py +0 -0
  104. {gitm_labs-0.0.2 → gitm_labs-0.0.4/gitm}/benchmarks/hft/generate.py +0 -0
  105. {gitm_labs-0.0.2 → gitm_labs-0.0.4/gitm}/benchmarks/hft/harness.py +0 -0
  106. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/benchmarks/kitti/__init__.py +0 -0
  107. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/doctor.py +0 -0
  108. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/__init__.py +0 -0
  109. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/library.py +0 -0
  110. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/library.yaml +0 -0
  111. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/spec.py +0 -0
  112. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/__init__.py +0 -0
  113. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/apply.py +0 -0
  114. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/attribution.py +0 -0
  115. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/dr.py +0 -0
  116. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/headroom_kernel_rank.py +0 -0
  117. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/invariants.py +0 -0
  118. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/monitor.py +0 -0
  119. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/multibasis.py +0 -0
  120. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/qualification.py +0 -0
  121. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/replay.py +0 -0
  122. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/replay_validation.py +0 -0
  123. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/report.py +0 -0
  124. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/templates/report.md.j2 +0 -0
  125. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/planner/__init__.py +0 -0
  126. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/planner/graph.py +0 -0
  127. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/planner/roofline.py +0 -0
  128. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/routing/__init__.py +0 -0
  129. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/routing/scorer_v0.py +0 -0
  130. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/scheduler/__init__.py +0 -0
  131. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/__init__.py +0 -0
  132. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/__init__.py +0 -0
  133. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/amd.py +0 -0
  134. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/base.py +0 -0
  135. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/discover.py +0 -0
  136. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/nvidia.py +0 -0
  137. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/collector.py +0 -0
  138. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/schema.py +0 -0
  139. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/__init__.py +0 -0
  140. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/jsonl.py +0 -0
  141. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/otlp.py +0 -0
  142. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/prometheus.py +0 -0
  143. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/s3.py +0 -0
  144. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/__init__.py +0 -0
  145. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/_cupti/build.py +0 -0
  146. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/_cupti/cupti_shim.c +0 -0
  147. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/_cupti_decode.py +0 -0
  148. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/capture.py +0 -0
  149. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/cupti.py +0 -0
  150. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/schema.py +0 -0
  151. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/gen_kitti_manifest.py +0 -0
  152. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/setup_openpcdet.sh +0 -0
  153. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/smoke_kitti.py +0 -0
  154. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/verify_manifest.py +0 -0
  155. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/compare_results.py +0 -0
  156. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/emit_report.py +0 -0
  157. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/gpu_live_capture.py +0 -0
  158. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/ship_to_pod.sh +0 -0
  159. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/w2_on_real_trace.py +0 -0
  160. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/__init__.py +0 -0
  161. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/conftest.py +0 -0
  162. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/golden/report_basic.md +0 -0
  163. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_apply_rollback.py +0 -0
  164. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_bench.py +0 -0
  165. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_bench_datasets.py +0 -0
  166. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_cupti.py +0 -0
  167. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_framework_harnesses.py +0 -0
  168. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_hft_harness.py +0 -0
  169. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_kitti_benchmark.py +0 -0
  170. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_planner_roofline.py +0 -0
  171. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_qualification_fingerprint.py +0 -0
  172. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_report_snapshot.py +0 -0
  173. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_scorer.py +0 -0
  174. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_smoke.py +0 -0
  175. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_tracer_jsonl.py +0 -0
  176. {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_w2_runtime.py +0 -0
@@ -0,0 +1,98 @@
1
+ name: Claude Code Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize]
6
+
7
+ jobs:
8
+ review:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ contents: read
12
+ pull-requests: write
13
+
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ with:
17
+ fetch-depth: 0
18
+
19
+ - name: Get PR diff
20
+ run: |
21
+ git diff origin/${{ github.base_ref }}...HEAD \
22
+ -- '*.py' '*.sh' '*.md' '*.yaml' '*.yml' \
23
+ > pr_diff.txt
24
+ echo "Diff size: $(wc -c < pr_diff.txt) bytes"
25
+
26
+ - name: Run Claude Review
27
+ env:
28
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
29
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
30
+ PR_NUMBER: ${{ github.event.pull_request.number }}
31
+ REPO: ${{ github.repository }}
32
+ run: |
33
+ DIFF_SIZE=$(wc -c < pr_diff.txt)
34
+
35
+ if [ "$DIFF_SIZE" -eq 0 ]; then
36
+ echo "No reviewable changes."
37
+ exit 0
38
+ fi
39
+
40
+ if [ "$DIFF_SIZE" -gt 75000 ]; then
41
+ echo "Diff too large (${DIFF_SIZE} bytes), truncating to first 75KB..."
42
+ head -c 75000 pr_diff.txt > pr_diff_trimmed.txt
43
+ mv pr_diff_trimmed.txt pr_diff.txt
44
+ fi
45
+
46
+ python3 << 'EOF'
47
+ import json, os, subprocess, sys, urllib.request
48
+
49
+ with open("pr_diff.txt") as f:
50
+ diff = f.read()
51
+
52
+ payload = {
53
+ "model": "claude-sonnet-4-6",
54
+ "max_tokens": 16384,
55
+ "messages": [{
56
+ "role": "user",
57
+ "content": (
58
+ "You are reviewing a GPU compute / ML benchmarking repo "
59
+ "(Python, CUDA, CuPy, PyTorch, OpenPCDet, bash). "
60
+ "Review this diff and give concise feedback. Use these sections "
61
+ "(skip any with nothing to report):\n\n"
62
+ "**🐛 Bugs** — logic errors, off-by-ones, wrong assumptions\n"
63
+ "**🔒 Security** — shell injection, hardcoded secrets, unsafe paths\n"
64
+ "**⚡ Performance** — unnecessary CPU↔GPU transfers, missed parallelism\n"
65
+ "**📊 Reproducibility** — seed handling, non-determinism risks\n"
66
+ "**💡 Suggestions** — missing error handling, untested edge cases\n\n"
67
+ f"```diff\n{diff}\n```"
68
+ )
69
+ }]
70
+ }
71
+
72
+ req = urllib.request.Request(
73
+ "https://api.anthropic.com/v1/messages",
74
+ data=json.dumps(payload).encode(),
75
+ headers={
76
+ "x-api-key": os.environ["ANTHROPIC_API_KEY"],
77
+ "anthropic-version": "2023-06-01",
78
+ "content-type": "application/json",
79
+ "anthropic-beta": "output-128k-2025-02-19"
80
+ }
81
+ )
82
+
83
+ try:
84
+ with urllib.request.urlopen(req) as resp:
85
+ data = json.load(resp)
86
+ comment = data["content"][0]["text"]
87
+ except urllib.error.HTTPError as e:
88
+ print(f"API error {e.code}: {e.read().decode()}", file=sys.stderr)
89
+ sys.exit(1)
90
+
91
+ body = f"## 🤖 Claude Code Review\n\n{comment}"
92
+ subprocess.run([
93
+ "gh", "pr", "comment",
94
+ os.environ["PR_NUMBER"],
95
+ "--repo", os.environ["REPO"],
96
+ "--body", body
97
+ ], check=True)
98
+ EOF
@@ -0,0 +1,26 @@
1
+ name: Gemini PR Code Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize, reopened]
6
+ workflow_dispatch: # Allows manual triggering
7
+
8
+ jobs:
9
+ gemini-review:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ contents: read
13
+ pull-requests: write
14
+
15
+ steps:
16
+ - name: Checkout code
17
+ uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0 # Fetches full history so git diff can calculate cleanly
20
+
21
+ - name: Run Gemini Code Review
22
+ uses: sshnaidm/gemini-code-review-action@v2
23
+ with:
24
+ gemini-key: ${{ secrets.GEMINI_API_KEY }}
25
+ model: "gemini-2.5-flash"
26
+ context-lines: 10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gitm-labs
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: Autonomous GPU kernel optimizer
5
5
  Author: GITM
6
6
  License: Proprietary
@@ -19,6 +19,14 @@ Requires-Dist: mypy>=1.10; extra == 'dev'
19
19
  Requires-Dist: pytest-cov>=4.1; extra == 'dev'
20
20
  Requires-Dist: pytest>=8.0; extra == 'dev'
21
21
  Requires-Dist: ruff>=0.4; extra == 'dev'
22
+ Provides-Extra: gpu
23
+ Requires-Dist: cudf-cu12>=24.0; (platform_system == 'Linux') and extra == 'gpu'
24
+ Requires-Dist: cupy-cuda12x>=13.0; (platform_system == 'Linux') and extra == 'gpu'
25
+ Requires-Dist: nvidia-cuda-cupti-cu12; (platform_system == 'Linux') and extra == 'gpu'
26
+ Requires-Dist: nvidia-cuda-runtime-cu12; (platform_system == 'Linux') and extra == 'gpu'
27
+ Requires-Dist: pandas>=2.0; extra == 'gpu'
28
+ Requires-Dist: pyarrow>=15; extra == 'gpu'
29
+ Requires-Dist: pynvml>=11.5; extra == 'gpu'
22
30
  Provides-Extra: nvidia
23
31
  Requires-Dist: pynvml>=11.5; extra == 'nvidia'
24
32
  Provides-Extra: otlp
@@ -32,6 +40,9 @@ Description-Content-Type: text/markdown
32
40
 
33
41
  # gitm-labs
34
42
 
43
+ <img width="1062" height="356" alt="image" src="https://github.com/user-attachments/assets/ffee3fc3-c42f-4fe5-9e31-c6a62a245f44" />
44
+
45
+
35
46
  Behavioral compiler + intervention runtime for GPU-intensive workloads. Given a workload and a time budget, gitm-labs autonomously profiles, attributes, and applies kernel-level interventions to hit a target performance improvement — and produces a provenance report showing exactly what it changed and why.
36
47
 
37
48
  ## Install
@@ -115,7 +126,7 @@ The monitor checks observed-vs-predicted against three invariants:
115
126
  2. **Memory-traffic** — per-kernel bytes-moved must match predicted.
116
127
  3. **Stream-concurrency** — predicted-concurrent kernels must overlap.
117
128
 
118
- See [docs/invariants.md](docs/invariants.md).
129
+ See [docs/invariants.md](https://github.com/GitM-Labs/runtime/blob/main/docs/invariants.md).
119
130
 
120
131
  ### Module responsibilities
121
132
 
@@ -133,8 +144,6 @@ See [docs/invariants.md](docs/invariants.md).
133
144
  | `gitm.agents` | Autonomous policy — selects interventions, drives rollback |
134
145
  | `gitm.scheduler` | 24-hour loop phase orchestration |
135
146
 
136
- See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full design.
137
-
138
147
  ## Data layout
139
148
 
140
149
  Two environment variables control where data lives:
@@ -1,5 +1,8 @@
1
1
  # gitm-labs
2
2
 
3
+ <img width="1062" height="356" alt="image" src="https://github.com/user-attachments/assets/ffee3fc3-c42f-4fe5-9e31-c6a62a245f44" />
4
+
5
+
3
6
  Behavioral compiler + intervention runtime for GPU-intensive workloads. Given a workload and a time budget, gitm-labs autonomously profiles, attributes, and applies kernel-level interventions to hit a target performance improvement — and produces a provenance report showing exactly what it changed and why.
4
7
 
5
8
  ## Install
@@ -83,7 +86,7 @@ The monitor checks observed-vs-predicted against three invariants:
83
86
  2. **Memory-traffic** — per-kernel bytes-moved must match predicted.
84
87
  3. **Stream-concurrency** — predicted-concurrent kernels must overlap.
85
88
 
86
- See [docs/invariants.md](docs/invariants.md).
89
+ See [docs/invariants.md](https://github.com/GitM-Labs/runtime/blob/main/docs/invariants.md).
87
90
 
88
91
  ### Module responsibilities
89
92
 
@@ -101,8 +104,6 @@ See [docs/invariants.md](docs/invariants.md).
101
104
  | `gitm.agents` | Autonomous policy — selects interventions, drives rollback |
102
105
  | `gitm.scheduler` | 24-hour loop phase orchestration |
103
106
 
104
- See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full design.
105
-
106
107
  ## Data layout
107
108
 
108
109
  Two environment variables control where data lives:
@@ -0,0 +1,77 @@
1
+ # Additional edge/robotics datasets — proposal
2
+
3
+ > Author: Karthik — for review by Adit before adding to spec.
4
+
5
+ Current scope: nuScenes v1.0 + KITTI Object (~47k keyframes combined).
6
+ Below are the next candidates ranked by signal value for the Git.M invariants.
7
+
8
+ ---
9
+
10
+ ## Tier 1 — High signal, worth adding
11
+
12
+ ### Waymo Open Dataset (v2.0)
13
+ - **Size:** ~1,000 segments, 200k frames, 5-beam lidar (top + 4 side)
14
+ - **Why it matters:** Much denser point clouds (64-beam top lidar vs KITTI's 64-beam but wider
15
+ range + higher annotation quality). Significantly harder for the backbone — GPU active % likely
16
+ higher, which tightens the stream-concurrency signal.
17
+ - **Concern:** Requires a data access agreement (Google form, ~1 week turnaround).
18
+ Also non-commercial only — verify with Adit before committing.
19
+ - **Manifest rows:** ~200k (5x current KITTI). Build time ~20 min.
20
+ - **Blocker:** License approval.
21
+
22
+ ### Argoverse 2 (Sensor Dataset)
23
+ - **Size:** 1,000 scenarios, ~30k frames, 2x lidar (spinning + forward-facing).
24
+ - **Why it matters:** Two asynchronous lidar streams per frame — interesting for concurrency
25
+ invariant because merging two streams before voxelization introduces a sync point.
26
+ Good test of whether stream-concurrency signal carries to multi-lidar setups.
27
+ - **Download:** Open access via S3 (`s3://argoai-argoverse2/...`). No license gate.
28
+ - **Manifest rows:** ~30k. Adds ~30% to current combined manifest.
29
+ - **Blocker:** None. Could add this week.
30
+
31
+ ---
32
+
33
+ ## Tier 2 — Useful if we want breadth
34
+
35
+ ### ONCE (One Million Scenes)
36
+ - **Size:** ~1M frames, single 40-beam lidar.
37
+ - **Why it matters:** Volume — more frames = tighter convergence bounds and better
38
+ steady-state GPU utilization measurements. Useful for validating that the 2%
39
+ convergence requirement holds at scale.
40
+ - **Download:** Open access (Chinese hosting, slow downloads). May need mirror.
41
+ - **Blocker:** Download bandwidth on RunPod. Otherwise no license gate.
42
+
43
+ ### PandaSet
44
+ - **Size:** ~16k frames, dual lidar (mechanical + solid-state).
45
+ - **Why it matters:** Solid-state lidar has a fundamentally different point density
46
+ pattern (non-uniform angular resolution). Tests whether the voxelization step
47
+ behaves differently under non-uniform inputs.
48
+ - **Download:** Open access (free sign-up, direct download).
49
+ - **Blocker:** None.
50
+
51
+ ---
52
+
53
+ ## Tier 3 — Lower priority
54
+
55
+ ### SemanticKITTI (KITTI odometry sequences with semantic labels)
56
+ - **Size:** Same lidar as KITTI Object but sequential (not individual frames).
57
+ 22 sequences, ~43k scans.
58
+ - **Why it matters:** Sequential frames are much more cache-friendly — useful
59
+ as a control condition to isolate the I/O cache locality effect.
60
+ - **Blocker:** None. Builds on top of existing KITTI download.
61
+
62
+ ### nuScenes-lidarseg
63
+ - **Same data as nuScenes v1.0** but with per-point semantic labels.
64
+ No new lidar frames; adds annotation load to the post-processing step.
65
+ - **Why it matters:** Tests sync_stall_pct sensitivity to heavier post-processing.
66
+
67
+ ---
68
+
69
+ ## Recommendation
70
+
71
+ Add **Argoverse 2** first — no license gate, open S3, meaningful new signal
72
+ (multi-lidar sync point). After that, pursue **Waymo** if the license approval
73
+ clears, since it's the most widely used benchmark for 3D detection and having
74
+ it in the manifest would make the benchmark credible to external readers.
75
+
76
+ Skip ONCE for now (download pain) and SemanticKITTI/PandaSet unless we need
77
+ more control conditions.
@@ -0,0 +1,14 @@
1
+ """Back-compat shim — the HFT generator moved into the installable package.
2
+
3
+ Canonical location is now :mod:`gitm.benchmarks.hft.generate` (it ships in the
4
+ wheel so the loop can auto-stage a smoke dataset from a pip install). Existing
5
+ imports of ``benchmarks.hft.generate`` keep working via the re-exports below.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from gitm.benchmarks.hft.generate import * # noqa: F401,F403
11
+ from gitm.benchmarks.hft.generate import main # noqa: F401 (used in __main__ below)
12
+
13
+ if __name__ == "__main__":
14
+ raise SystemExit(main())
@@ -0,0 +1,17 @@
1
+ """Back-compat shim — the HFT harness moved into the installable package.
2
+
3
+ Canonical location is now :mod:`gitm.benchmarks.hft.harness` (it ships in the
4
+ wheel; this top-level ``benchmarks/`` tree does not). Existing imports of
5
+ ``benchmarks.hft.harness`` keep working via the re-exports below.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from gitm.benchmarks.hft.harness import * # noqa: F401,F403
11
+
12
+ # `import *` skips underscore names; forward the private helpers explicitly so
13
+ # the old import path stays fully compatible. `main` is also used below.
14
+ from gitm.benchmarks.hft.harness import _gpu_name, _seed_dir, main # noqa: F401
15
+
16
+ if __name__ == "__main__":
17
+ raise SystemExit(main())
@@ -2,17 +2,21 @@
2
2
 
3
3
  ## Baseline measurements
4
4
 
5
- | Run | Seed | fps | GPU active % | Data stall % | Sync % | CPU % |
6
- |-----|------|-----|--------------|-------------|--------|-------|
7
- | Baseline 1 | 42 | TBD | TBD | TBD | TBD | TBD |
8
- | Baseline 2 | 43 | TBD | TBD | TBD | TBD | TBD |
9
- | Baseline 3 | 44 | TBD | TBD | TBD | TBD | TBD |
10
- | Mean | | TBD | TBD | TBD | TBD | TBD |
11
- | Stddev | | TBD | TBD | TBD | TBD | TBD |
12
-
13
- 3-seed fps spread: TBD% within 2%: TBD
14
-
15
- NVML cross-check (mean utilization): TBD%
5
+ | Run | Seed | fps | GPU active % | Data stall % | Sync % | CPU % | Compute headroom % |
6
+ |-----|------|-----|--------------|-------------|--------|-------|-------------------|
7
+ | Baseline 1 | 42 | TBD | TBD | TBD | TBD | TBD | TBD |
8
+ | Baseline 2 | 43 | TBD | TBD | TBD | TBD | TBD | TBD |
9
+ | Baseline 3 | 44 | TBD | TBD | TBD | TBD | TBD | TBD |
10
+ | Baseline 4 | 45 | TBD | TBD | TBD | TBD | TBD | TBD |
11
+ | Baseline 5 | 46 | TBD | TBD | TBD | TBD | TBD | TBD |
12
+ | Baseline 6 | 47 | TBD | TBD | TBD | TBD | TBD | TBD |
13
+ | Mean | -- | TBD | TBD | TBD | TBD | TBD | TBD |
14
+ | Stddev | -- | TBD | TBD | TBD | TBD | TBD | -- |
15
+
16
+ 6-seed fps spread: TBD% -- within 2%: TBD
17
+
18
+ GPU headroom (compute_headroom_pct = 100 - mean NVML util): TBD%
19
+ Memory free at peak: TBD GB
16
20
 
17
21
  ## Stream-concurrency verification
18
22
 
@@ -23,7 +27,7 @@ Host-side voxelization overlaps device-side backbone inference: **TBD**
23
27
  python harness/smoke_kitti.py --cfg $OPENPCDET_CFG --ckpt $OPENPCDET_CKPT --n-frames 200
24
28
  Open the .nsys-rep in Nsight Systems GUI, zoom in on a few consecutive frames,
25
29
  look for CPU voxelization bar overlapping GPU backbone bar.
26
- Screenshot benchmarks/kitti/concurrency_timeline.png
30
+ Screenshot -> benchmarks/kitti/concurrency_timeline.png
27
31
  -->
28
32
 
29
33
  ![nsys concurrency timeline](concurrency_timeline.png)
@@ -36,11 +40,11 @@ invariant has no signal for this workload and the benchmark needs review.
36
40
 
37
41
  ## Notes
38
42
 
39
- - Machine: TBD
43
+ - Machine: RunPod y4xbh7yws2e4tu-64410cb0
40
44
  - GPU: TBD
41
45
  - Driver version: TBD
42
46
  - CUDA version: TBD
43
- - OpenPCDet commit: TBD
44
- - Config sha256: TBD
47
+ - OpenPCDet commit: 233f849829b6ac19afb8af8837a0246890908755
48
+ - Config sha256: 170a9ffe76cfd8509d1044cfbcf1cbd44c5d320fda81bf0089a8d5efaf1c91c8
45
49
  - Checkpoint sha256: 4c83fc0fa02575b9b3e9dec676f698e7a70bb5a795e89f91df8a96b916fa19e2
46
50
  - Date: TBD
@@ -0,0 +1,109 @@
1
+ # KITTI edge benchmark spec
2
+
3
+ ## Section 1: Input definition
4
+
5
+ Datasets used:
6
+ - KITTI 3D Object Detection: 7,481 training frames, Velodyne lidar + calibration + labels
7
+ - Data location: `$GITM_DATA_ROOT/kitti/training/`
8
+ - Directory layout:
9
+ - `velodyne/` -- 000000.bin to 007480.bin (float32 XYZI point clouds)
10
+ - `calib/` -- 000000.txt to 007480.txt (camera-lidar calibration)
11
+ - `label_2/` -- 000000.txt to 007480.txt (3D bounding box annotations)
12
+ - Manifest: `benchmarks/kitti/manifest.yaml`
13
+ - Every file sha256-verified. Pass/fail gated by `python harness/verify_manifest.py`.
14
+ - Generate: `python harness/gen_kitti_manifest.py --root $GITM_DATA_ROOT/kitti/training`
15
+
16
+ ## Section 2: Work unit
17
+
18
+ One frame processed end-to-end through:
19
+
20
+ voxelization -> 3D backbone (PointPillars) -> BEV head -> NMS -> detections
21
+
22
+ Model: OpenPCDet PointPillars (KITTI config)
23
+
24
+ - OpenPCDet commit: `233f849829b6ac19afb8af8837a0246890908755`
25
+ - Config (pointpillar.yaml) sha256: `170a9ffe76cfd8509d1044cfbcf1cbd44c5d320fda81bf0089a8d5efaf1c91c8`
26
+ - Checkpoint: `pointpillar_7728.pth`
27
+ - Checkpoint sha256: `4c83fc0fa02575b9b3e9dec676f698e7a70bb5a795e89f91df8a96b916fa19e2`
28
+
29
+ Stage breakdown per frame:
30
+ 1. Load .bin (np.fromfile) -- CPU / data stall
31
+ 2. Voxelization + H2D copy -- CPU / data stall
32
+ 3. Backbone + BEV head -- GPU active
33
+ 4. NMS + box assembly -- CPU / sync stall
34
+
35
+ Implementation: `gitm.benchmarks.kitti.WorkUnit`
36
+
37
+ ## Section 3: Success metric
38
+
39
+ - Top-line metric: `frames_per_second` (timed warm window)
40
+ - Warm-up: 100 frames discarded before timing begins
41
+ - Disk pre-warm: all frames read once before GPU warmup (eliminates OS page cache
42
+ locality as a seed-ordering confound)
43
+ - Timed window: 7,381 frames (all training frames minus warmup)
44
+ - Convergence: 6 seeds (42-47) must agree within 2% fps spread
45
+ - GPU saturation check: GPU active % must be < 85%
46
+ - Auxiliary: `total_detections` per run (regression sentinel, not a target)
47
+
48
+ Baseline result (fill after running `bash harness/run_baselines.sh`):
49
+
50
+ | Seed | fps | GPU active % | Data stall % | Sync % | CPU % | Compute headroom % |
51
+ |------|-----|--------------|-------------|--------|-------|-------------------|
52
+ | 42 | TBD | TBD | TBD | TBD | TBD | TBD |
53
+ | 43 | TBD | TBD | TBD | TBD | TBD | TBD |
54
+ | 44 | TBD | TBD | TBD | TBD | TBD | TBD |
55
+ | 45 | TBD | TBD | TBD | TBD | TBD | TBD |
56
+ | 46 | TBD | TBD | TBD | TBD | TBD | TBD |
57
+ | 47 | TBD | TBD | TBD | TBD | TBD | TBD |
58
+ | Mean | TBD | TBD | TBD | TBD | TBD | TBD |
59
+ | Stddev | TBD | TBD | TBD | TBD | TBD | -- |
60
+
61
+ 6-seed fps spread: TBD -- within 2%: TBD
62
+
63
+ ## Section 4: Expected stall profile
64
+
65
+ | Category | What it is | Expected % | Measured % |
66
+ |----------|-----------|------------|------------|
67
+ | Data stall | lidar .bin decode + host-side voxelization + H2D copy | 20-35% | TBD |
68
+ | Sync stall | NMS serialization on CPU | 10-20% | TBD |
69
+ | GPU active | backbone + BEV head forward pass | 50-65% | TBD |
70
+ | CPU overhead | Python dispatch, dataloader | ~5% | TBD |
71
+
72
+ **Critical check:** GPU active must be < 85%. If saturated, flag Adit same day
73
+ for 500-frame shard fallback.
74
+
75
+ **Stream-concurrency check (nsys):** host-side voxelization of frame N+1 should
76
+ overlap device-side backbone inference on frame N. Capture nsys timeline and
77
+ commit screenshot to `benchmarks/kitti/results.md`. If overlap is absent, the
78
+ stream-concurrency invariant has no signal -- flag Adit immediately.
79
+
80
+ ## Section 5: GPU headroom (runtime integration)
81
+
82
+ Measured via `gitm.optimizer.headroom_kernel_rank.gpu_headroom()` using NVML
83
+ samples collected at 5 Hz during the timed window.
84
+
85
+ | Metric | Expected | Measured |
86
+ |--------|----------|---------|
87
+ | Compute headroom (100 - mean util) | >35% | TBD |
88
+ | Memory free at peak | >10 GB | TBD |
89
+
90
+ Per-stage spread (p50/p95 latency per stage across all frames):
91
+
92
+ | Stage | mean ms | p50 ms | p95 ms | % of frame |
93
+ |-------|---------|--------|--------|------------|
94
+ | load | TBD | TBD | TBD | TBD |
95
+ | preprocess (voxelize + H2D) | TBD | TBD | TBD | TBD |
96
+ | inference (backbone + BEV + NMS) | TBD | TBD | TBD | TBD |
97
+ | postprocess (D2H) | TBD | TBD | TBD | TBD |
98
+
99
+ Stage spread is emitted as `stage_spread` in each baseline JSON and as
100
+ `stage_spread_report.txt` alongside it.
101
+
102
+ ## Environment
103
+
104
+ - Machine: RunPod y4xbh7yws2e4tu-64410cb0 (2 TB persistent /workspace)
105
+ - GPU: TBD
106
+ - Driver: TBD
107
+ - CUDA: TBD
108
+ - OpenPCDet commit: 233f849829b6ac19afb8af8837a0246890908755
109
+ - Date: TBD
@@ -2,7 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- __version__ = "0.0.1"
5
+ __version__ = "0.0.4"
6
6
 
7
7
  from gitm.api import optimize
8
8
 
@@ -6,6 +6,7 @@
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ from collections.abc import Callable
9
10
  from typing import Any
10
11
 
11
12
  from gitm.scheduler import LoopConfig, run_loop
@@ -18,6 +19,7 @@ def optimize(
18
19
  budget: str = "24h",
19
20
  target: float = 0.15,
20
21
  scratch: str | None = None,
22
+ workload_runner: Callable[[], dict[str, Any]] | None = None,
21
23
  ) -> dict[str, Any]:
22
24
  """Run the autonomous 24-hour optimization loop and return a report.
23
25
 
@@ -26,6 +28,11 @@ def optimize(
26
28
  path. ``budget`` and ``target`` follow the SKU contract: a verified floor
27
29
  of ``target`` fraction improvement within ``budget`` wall time, or a
28
30
  qualification-gate diagnostic explaining why the floor was not committed.
31
+
32
+ ``workload_runner`` optionally supplies an explicit zero-arg callable that
33
+ launches the workload's GPU work; it runs inside the capture window. When
34
+ omitted, the loop resolves ``workload`` against the registry in
35
+ :mod:`gitm.workloads`.
29
36
  """
30
37
  cfg = LoopConfig(
31
38
  engine=engine,
@@ -33,5 +40,6 @@ def optimize(
33
40
  budget=budget,
34
41
  target=target,
35
42
  scratch=scratch,
43
+ workload_runner=workload_runner,
36
44
  )
37
45
  return run_loop(cfg)
@@ -0,0 +1,10 @@
1
+ """GITM edge (nuScenes) benchmark — CenterPoint-PointPillar baseline.
2
+
3
+ Mirrors gitm.benchmarks.kitti but for the nuScenes CenterPoint-PointPillar
4
+ (dyn / GPU-voxelization) baseline, with multi-sweep (keyframe + 9 sweeps)
5
+ point accumulation sourced from OpenPCDet's NuScenesDataset.
6
+ """
7
+
8
+ from gitm.benchmarks.edge.workunit import NuScenesWorkUnit, WorkUnitResult
9
+
10
+ __all__ = ["NuScenesWorkUnit", "WorkUnitResult"]