gitm-labs 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitm_labs-0.0.4/.github/workflows/claude-review.yml +98 -0
- gitm_labs-0.0.4/.github/workflows/gemini-pr-review.yml +26 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/PKG-INFO +13 -4
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/README.md +4 -3
- gitm_labs-0.0.4/benchmarks/edge/datasets_proposal.md +77 -0
- gitm_labs-0.0.4/benchmarks/hft/generate.py +14 -0
- gitm_labs-0.0.4/benchmarks/hft/harness.py +17 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/kitti/results.md +19 -15
- gitm_labs-0.0.4/benchmarks/kitti/spec.md +109 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/__init__.py +1 -1
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/api.py +8 -0
- gitm_labs-0.0.4/gitm/benchmarks/edge/__init__.py +10 -0
- gitm_labs-0.0.4/gitm/benchmarks/edge/baseline.py +261 -0
- gitm_labs-0.0.4/gitm/benchmarks/edge/workunit.py +240 -0
- gitm_labs-0.0.4/gitm/benchmarks/hft/__init__.py +8 -0
- gitm_labs-0.0.4/gitm/benchmarks/kitti/baseline.py +405 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/benchmarks/kitti/workunit.py +3 -5
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/cli.py +5 -2
- gitm_labs-0.0.4/gitm/planner/kitti_graph.py +261 -0
- gitm_labs-0.0.2/scripts/run_under_runtime.py → gitm_labs-0.0.4/gitm/runtime_driver.py +92 -29
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/scheduler/loop.py +92 -3
- gitm_labs-0.0.4/gitm/tracer/_cupti/__init__.py +76 -0
- gitm_labs-0.0.4/gitm/workloads.py +179 -0
- gitm_labs-0.0.4/harness/build_edge_yaml_manifest.py +132 -0
- gitm_labs-0.0.4/harness/fill_results.py +301 -0
- gitm_labs-0.0.4/harness/pod_setup_and_run.sh +182 -0
- gitm_labs-0.0.4/harness/run_baselines.sh +108 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/pyproject.toml +23 -1
- gitm_labs-0.0.4/scripts/demo_improve_gpu.py +176 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/gpu_setup.sh +10 -3
- gitm_labs-0.0.4/scripts/run_under_runtime.py +13 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/verify_infra.sh +1 -1
- gitm_labs-0.0.4/skills/gitm-context-store.md +246 -0
- gitm_labs-0.0.4/skills/gitm-internal-status-loop.md +266 -0
- gitm_labs-0.0.4/tests/test_run_loop_workload.py +94 -0
- gitm_labs-0.0.4/tests/test_workload_bootstrap.py +76 -0
- gitm_labs-0.0.2/benchmarks/kitti/spec.md +0 -80
- gitm_labs-0.0.2/gitm/benchmarks/kitti/baseline.py +0 -249
- gitm_labs-0.0.2/gitm/tracer/_cupti/__init__.py +0 -24
- gitm_labs-0.0.2/harness/run_baselines.sh +0 -90
- gitm_labs-0.0.2/skills/gitm-internal-status-loop.md +0 -178
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/.github/workflows/workflow.yml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/.gitignore +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/Dockerfile +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/assets/arch.png +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/Makefile.common +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/README.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/_smoke_harness.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/_templates/datasets.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/_templates/spec.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/Makefile +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/bench.smoke.toml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/bench.toml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/datasets.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/fetch.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/harness.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/sanity.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/biotech/spec.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/Makefile +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/bench.smoke.toml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/bench.toml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/build_manifest.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/datasets.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/fetch.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/harness.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/kitti_source.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/nuscenes_source.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/edge/spec.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/Makefile +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/bench.smoke.toml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/bench.toml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/datasets.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/gen_manifest.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/generator/CMakeLists.txt +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/generator/main.cpp +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/manifest.yaml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/hft/spec.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/skeleton/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/skeleton/measure_overhead.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/benchmarks/skeleton/overhead.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/constraints.txt +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/docs/invariants.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/docs/scoring/input_contract.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/_paths.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/agents/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/agents/policy.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/__main__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/baseline.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/cli.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/edge_manifest.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/manifest.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/profile.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/reproduce.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/results.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/runner.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/schema.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/bench/templates/results.md.j2 +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/benchmarks/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4/gitm}/benchmarks/hft/generate.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4/gitm}/benchmarks/hft/harness.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/benchmarks/kitti/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/doctor.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/library.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/library.yaml +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/kernels/spec.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/apply.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/attribution.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/dr.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/headroom_kernel_rank.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/invariants.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/monitor.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/multibasis.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/qualification.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/replay.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/replay_validation.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/report.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/optimizer/templates/report.md.j2 +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/planner/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/planner/graph.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/planner/roofline.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/routing/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/routing/scorer_v0.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/scheduler/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/amd.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/base.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/discover.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/backends/nvidia.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/collector.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/schema.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/jsonl.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/otlp.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/prometheus.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/telemetry/sinks/s3.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/_cupti/build.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/_cupti/cupti_shim.c +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/_cupti_decode.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/capture.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/cupti.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/gitm/tracer/schema.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/gen_kitti_manifest.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/setup_openpcdet.sh +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/smoke_kitti.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/harness/verify_manifest.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/compare_results.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/emit_report.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/gpu_live_capture.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/ship_to_pod.sh +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/scripts/w2_on_real_trace.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/__init__.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/conftest.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/golden/report_basic.md +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_apply_rollback.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_bench.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_bench_datasets.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_cupti.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_framework_harnesses.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_hft_harness.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_kitti_benchmark.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_planner_roofline.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_qualification_fingerprint.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_report_snapshot.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_scorer.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_smoke.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_tracer_jsonl.py +0 -0
- {gitm_labs-0.0.2 → gitm_labs-0.0.4}/tests/test_w2_runtime.py +0 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
name: Claude Code Review
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
types: [opened, synchronize]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
review:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
pull-requests: write
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
with:
|
|
17
|
+
fetch-depth: 0
|
|
18
|
+
|
|
19
|
+
- name: Get PR diff
|
|
20
|
+
run: |
|
|
21
|
+
git diff origin/${{ github.base_ref }}...HEAD \
|
|
22
|
+
-- '*.py' '*.sh' '*.md' '*.yaml' '*.yml' \
|
|
23
|
+
> pr_diff.txt
|
|
24
|
+
echo "Diff size: $(wc -c < pr_diff.txt) bytes"
|
|
25
|
+
|
|
26
|
+
- name: Run Claude Review
|
|
27
|
+
env:
|
|
28
|
+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
29
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
30
|
+
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
31
|
+
REPO: ${{ github.repository }}
|
|
32
|
+
run: |
|
|
33
|
+
DIFF_SIZE=$(wc -c < pr_diff.txt)
|
|
34
|
+
|
|
35
|
+
if [ "$DIFF_SIZE" -eq 0 ]; then
|
|
36
|
+
echo "No reviewable changes."
|
|
37
|
+
exit 0
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
if [ "$DIFF_SIZE" -gt 75000 ]; then
|
|
41
|
+
echo "Diff too large (${DIFF_SIZE} bytes), truncating to first 75KB..."
|
|
42
|
+
head -c 75000 pr_diff.txt > pr_diff_trimmed.txt
|
|
43
|
+
mv pr_diff_trimmed.txt pr_diff.txt
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
python3 << 'EOF'
|
|
47
|
+
import json, os, subprocess, sys, urllib.request
|
|
48
|
+
|
|
49
|
+
with open("pr_diff.txt") as f:
|
|
50
|
+
diff = f.read()
|
|
51
|
+
|
|
52
|
+
payload = {
|
|
53
|
+
"model": "claude-sonnet-4-6",
|
|
54
|
+
"max_tokens": 16384,
|
|
55
|
+
"messages": [{
|
|
56
|
+
"role": "user",
|
|
57
|
+
"content": (
|
|
58
|
+
"You are reviewing a GPU compute / ML benchmarking repo "
|
|
59
|
+
"(Python, CUDA, CuPy, PyTorch, OpenPCDet, bash). "
|
|
60
|
+
"Review this diff and give concise feedback. Use these sections "
|
|
61
|
+
"(skip any with nothing to report):\n\n"
|
|
62
|
+
"**🐛 Bugs** — logic errors, off-by-ones, wrong assumptions\n"
|
|
63
|
+
"**🔒 Security** — shell injection, hardcoded secrets, unsafe paths\n"
|
|
64
|
+
"**⚡ Performance** — unnecessary CPU↔GPU transfers, missed parallelism\n"
|
|
65
|
+
"**📊 Reproducibility** — seed handling, non-determinism risks\n"
|
|
66
|
+
"**💡 Suggestions** — missing error handling, untested edge cases\n\n"
|
|
67
|
+
f"```diff\n{diff}\n```"
|
|
68
|
+
)
|
|
69
|
+
}]
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
req = urllib.request.Request(
|
|
73
|
+
"https://api.anthropic.com/v1/messages",
|
|
74
|
+
data=json.dumps(payload).encode(),
|
|
75
|
+
headers={
|
|
76
|
+
"x-api-key": os.environ["ANTHROPIC_API_KEY"],
|
|
77
|
+
"anthropic-version": "2023-06-01",
|
|
78
|
+
"content-type": "application/json",
|
|
79
|
+
"anthropic-beta": "output-128k-2025-02-19"
|
|
80
|
+
}
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
with urllib.request.urlopen(req) as resp:
|
|
85
|
+
data = json.load(resp)
|
|
86
|
+
comment = data["content"][0]["text"]
|
|
87
|
+
except urllib.error.HTTPError as e:
|
|
88
|
+
print(f"API error {e.code}: {e.read().decode()}", file=sys.stderr)
|
|
89
|
+
sys.exit(1)
|
|
90
|
+
|
|
91
|
+
body = f"## 🤖 Claude Code Review\n\n{comment}"
|
|
92
|
+
subprocess.run([
|
|
93
|
+
"gh", "pr", "comment",
|
|
94
|
+
os.environ["PR_NUMBER"],
|
|
95
|
+
"--repo", os.environ["REPO"],
|
|
96
|
+
"--body", body
|
|
97
|
+
], check=True)
|
|
98
|
+
EOF
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Gemini PR Code Review
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
types: [opened, synchronize, reopened]
|
|
6
|
+
workflow_dispatch: # Allows manual triggering
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
gemini-review:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
contents: read
|
|
13
|
+
pull-requests: write
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- name: Checkout code
|
|
17
|
+
uses: actions/checkout@v4
|
|
18
|
+
with:
|
|
19
|
+
fetch-depth: 0 # Fetches full history so git diff can calculate cleanly
|
|
20
|
+
|
|
21
|
+
- name: Run Gemini Code Review
|
|
22
|
+
uses: sshnaidm/gemini-code-review-action@v2
|
|
23
|
+
with:
|
|
24
|
+
gemini-key: ${{ secrets.GEMINI_API_KEY }}
|
|
25
|
+
model: "gemini-2.5-flash"
|
|
26
|
+
context-lines: 10
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gitm-labs
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: Autonomous GPU kernel optimizer
|
|
5
5
|
Author: GITM
|
|
6
6
|
License: Proprietary
|
|
@@ -19,6 +19,14 @@ Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
|
19
19
|
Requires-Dist: pytest-cov>=4.1; extra == 'dev'
|
|
20
20
|
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
21
21
|
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
22
|
+
Provides-Extra: gpu
|
|
23
|
+
Requires-Dist: cudf-cu12>=24.0; (platform_system == 'Linux') and extra == 'gpu'
|
|
24
|
+
Requires-Dist: cupy-cuda12x>=13.0; (platform_system == 'Linux') and extra == 'gpu'
|
|
25
|
+
Requires-Dist: nvidia-cuda-cupti-cu12; (platform_system == 'Linux') and extra == 'gpu'
|
|
26
|
+
Requires-Dist: nvidia-cuda-runtime-cu12; (platform_system == 'Linux') and extra == 'gpu'
|
|
27
|
+
Requires-Dist: pandas>=2.0; extra == 'gpu'
|
|
28
|
+
Requires-Dist: pyarrow>=15; extra == 'gpu'
|
|
29
|
+
Requires-Dist: pynvml>=11.5; extra == 'gpu'
|
|
22
30
|
Provides-Extra: nvidia
|
|
23
31
|
Requires-Dist: pynvml>=11.5; extra == 'nvidia'
|
|
24
32
|
Provides-Extra: otlp
|
|
@@ -32,6 +40,9 @@ Description-Content-Type: text/markdown
|
|
|
32
40
|
|
|
33
41
|
# gitm-labs
|
|
34
42
|
|
|
43
|
+
<img width="1062" height="356" alt="image" src="https://github.com/user-attachments/assets/ffee3fc3-c42f-4fe5-9e31-c6a62a245f44" />
|
|
44
|
+
|
|
45
|
+
|
|
35
46
|
Behavioral compiler + intervention runtime for GPU-intensive workloads. Given a workload and a time budget, gitm-labs autonomously profiles, attributes, and applies kernel-level interventions to hit a target performance improvement — and produces a provenance report showing exactly what it changed and why.
|
|
36
47
|
|
|
37
48
|
## Install
|
|
@@ -115,7 +126,7 @@ The monitor checks observed-vs-predicted against three invariants:
|
|
|
115
126
|
2. **Memory-traffic** — per-kernel bytes-moved must match predicted.
|
|
116
127
|
3. **Stream-concurrency** — predicted-concurrent kernels must overlap.
|
|
117
128
|
|
|
118
|
-
See [docs/invariants.md](docs/invariants.md).
|
|
129
|
+
See [docs/invariants.md](https://github.com/GitM-Labs/runtime/blob/main/docs/invariants.md).
|
|
119
130
|
|
|
120
131
|
### Module responsibilities
|
|
121
132
|
|
|
@@ -133,8 +144,6 @@ See [docs/invariants.md](docs/invariants.md).
|
|
|
133
144
|
| `gitm.agents` | Autonomous policy — selects interventions, drives rollback |
|
|
134
145
|
| `gitm.scheduler` | 24-hour loop phase orchestration |
|
|
135
146
|
|
|
136
|
-
See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full design.
|
|
137
|
-
|
|
138
147
|
## Data layout
|
|
139
148
|
|
|
140
149
|
Two environment variables control where data lives:
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# gitm-labs
|
|
2
2
|
|
|
3
|
+
<img width="1062" height="356" alt="image" src="https://github.com/user-attachments/assets/ffee3fc3-c42f-4fe5-9e31-c6a62a245f44" />
|
|
4
|
+
|
|
5
|
+
|
|
3
6
|
Behavioral compiler + intervention runtime for GPU-intensive workloads. Given a workload and a time budget, gitm-labs autonomously profiles, attributes, and applies kernel-level interventions to hit a target performance improvement — and produces a provenance report showing exactly what it changed and why.
|
|
4
7
|
|
|
5
8
|
## Install
|
|
@@ -83,7 +86,7 @@ The monitor checks observed-vs-predicted against three invariants:
|
|
|
83
86
|
2. **Memory-traffic** — per-kernel bytes-moved must match predicted.
|
|
84
87
|
3. **Stream-concurrency** — predicted-concurrent kernels must overlap.
|
|
85
88
|
|
|
86
|
-
See [docs/invariants.md](docs/invariants.md).
|
|
89
|
+
See [docs/invariants.md](https://github.com/GitM-Labs/runtime/blob/main/docs/invariants.md).
|
|
87
90
|
|
|
88
91
|
### Module responsibilities
|
|
89
92
|
|
|
@@ -101,8 +104,6 @@ See [docs/invariants.md](docs/invariants.md).
|
|
|
101
104
|
| `gitm.agents` | Autonomous policy — selects interventions, drives rollback |
|
|
102
105
|
| `gitm.scheduler` | 24-hour loop phase orchestration |
|
|
103
106
|
|
|
104
|
-
See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full design.
|
|
105
|
-
|
|
106
107
|
## Data layout
|
|
107
108
|
|
|
108
109
|
Two environment variables control where data lives:
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Additional edge/robotics datasets — proposal
|
|
2
|
+
|
|
3
|
+
> Author: Karthik — for review by Adit before adding to spec.
|
|
4
|
+
|
|
5
|
+
Current scope: nuScenes v1.0 + KITTI Object (~47k keyframes combined).
|
|
6
|
+
Below are the next candidates ranked by signal value for the Git.M invariants.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Tier 1 — High signal, worth adding
|
|
11
|
+
|
|
12
|
+
### Waymo Open Dataset (v2.0)
|
|
13
|
+
- **Size:** ~1,000 segments, 200k frames, 5-beam lidar (top + 4 side)
|
|
14
|
+
- **Why it matters:** Much denser point clouds (64-beam top lidar vs KITTI's 64-beam but wider
|
|
15
|
+
range + higher annotation quality). Significantly harder for the backbone — GPU active % likely
|
|
16
|
+
higher, which tightens the stream-concurrency signal.
|
|
17
|
+
- **Concern:** Requires a data access agreement (Google form, ~1 week turnaround).
|
|
18
|
+
Also non-commercial only — verify with Adit before committing.
|
|
19
|
+
- **Manifest rows:** ~200k (5x current KITTI). Build time ~20 min.
|
|
20
|
+
- **Blocker:** License approval.
|
|
21
|
+
|
|
22
|
+
### Argoverse 2 (Sensor Dataset)
|
|
23
|
+
- **Size:** 1,000 scenarios, ~30k frames, 2x lidar (spinning + forward-facing).
|
|
24
|
+
- **Why it matters:** Two asynchronous lidar streams per frame — interesting for concurrency
|
|
25
|
+
invariant because merging two streams before voxelization introduces a sync point.
|
|
26
|
+
Good test of whether stream-concurrency signal carries to multi-lidar setups.
|
|
27
|
+
- **Download:** Open access via S3 (`s3://argoai-argoverse2/...`). No license gate.
|
|
28
|
+
- **Manifest rows:** ~30k. Adds ~30% to current combined manifest.
|
|
29
|
+
- **Blocker:** None. Could add this week.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Tier 2 — Useful if we want breadth
|
|
34
|
+
|
|
35
|
+
### ONCE (One Million Scenes)
|
|
36
|
+
- **Size:** ~1M frames, single 40-beam lidar.
|
|
37
|
+
- **Why it matters:** Volume — more frames = tighter convergence bounds and better
|
|
38
|
+
steady-state GPU utilization measurements. Useful for validating that the 2%
|
|
39
|
+
convergence requirement holds at scale.
|
|
40
|
+
- **Download:** Open access (Chinese hosting, slow downloads). May need mirror.
|
|
41
|
+
- **Blocker:** Download bandwidth on RunPod. Otherwise no license gate.
|
|
42
|
+
|
|
43
|
+
### PandaSet
|
|
44
|
+
- **Size:** ~16k frames, dual lidar (mechanical + solid-state).
|
|
45
|
+
- **Why it matters:** Solid-state lidar has a fundamentally different point density
|
|
46
|
+
pattern (non-uniform angular resolution). Tests whether the voxelization step
|
|
47
|
+
behaves differently under non-uniform inputs.
|
|
48
|
+
- **Download:** Open access (free sign-up, direct download).
|
|
49
|
+
- **Blocker:** None.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Tier 3 — Lower priority
|
|
54
|
+
|
|
55
|
+
### SemanticKITTI (KITTI odometry sequences with semantic labels)
|
|
56
|
+
- **Size:** Same lidar as KITTI Object but sequential (not individual frames).
|
|
57
|
+
22 sequences, ~43k scans.
|
|
58
|
+
- **Why it matters:** Sequential frames are much more cache-friendly — useful
|
|
59
|
+
as a control condition to isolate the I/O cache locality effect.
|
|
60
|
+
- **Blocker:** None. Builds on top of existing KITTI download.
|
|
61
|
+
|
|
62
|
+
### nuScenes-lidarseg
|
|
63
|
+
- **Same data as nuScenes v1.0** but with per-point semantic labels.
|
|
64
|
+
No new lidar frames; adds annotation load to the post-processing step.
|
|
65
|
+
- **Why it matters:** Tests sync_stall_pct sensitivity to heavier post-processing.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Recommendation
|
|
70
|
+
|
|
71
|
+
Add **Argoverse 2** first — no license gate, open S3, meaningful new signal
|
|
72
|
+
(multi-lidar sync point). After that, pursue **Waymo** if the license approval
|
|
73
|
+
clears, since it's the most widely used benchmark for 3D detection and having
|
|
74
|
+
it in the manifest would make the benchmark credible to external readers.
|
|
75
|
+
|
|
76
|
+
Skip ONCE for now (download pain) and SemanticKITTI/PandaSet unless we need
|
|
77
|
+
more control conditions.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Back-compat shim — the HFT generator moved into the installable package.
|
|
2
|
+
|
|
3
|
+
Canonical location is now :mod:`gitm.benchmarks.hft.generate` (it ships in the
|
|
4
|
+
wheel so the loop can auto-stage a smoke dataset from a pip install). Existing
|
|
5
|
+
imports of ``benchmarks.hft.generate`` keep working via the re-exports below.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from gitm.benchmarks.hft.generate import * # noqa: F401,F403
|
|
11
|
+
from gitm.benchmarks.hft.generate import main # noqa: F401 (used in __main__ below)
|
|
12
|
+
|
|
13
|
+
if __name__ == "__main__":
|
|
14
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Back-compat shim — the HFT harness moved into the installable package.
|
|
2
|
+
|
|
3
|
+
Canonical location is now :mod:`gitm.benchmarks.hft.harness` (it ships in the
|
|
4
|
+
wheel; this top-level ``benchmarks/`` tree does not). Existing imports of
|
|
5
|
+
``benchmarks.hft.harness`` keep working via the re-exports below.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from gitm.benchmarks.hft.harness import * # noqa: F401,F403
|
|
11
|
+
|
|
12
|
+
# `import *` skips underscore names; forward the private helpers explicitly so
|
|
13
|
+
# the old import path stays fully compatible. `main` is also used below.
|
|
14
|
+
from gitm.benchmarks.hft.harness import _gpu_name, _seed_dir, main # noqa: F401
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
raise SystemExit(main())
|
|
@@ -2,17 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
## Baseline measurements
|
|
4
4
|
|
|
5
|
-
| Run | Seed | fps | GPU active % | Data stall % | Sync % | CPU % |
|
|
6
|
-
|
|
7
|
-
| Baseline 1 | 42 | TBD | TBD | TBD | TBD | TBD |
|
|
8
|
-
| Baseline 2 | 43 | TBD | TBD | TBD | TBD | TBD |
|
|
9
|
-
| Baseline 3 | 44 | TBD | TBD | TBD | TBD | TBD |
|
|
10
|
-
|
|
|
11
|
-
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
5
|
+
| Run | Seed | fps | GPU active % | Data stall % | Sync % | CPU % | Compute headroom % |
|
|
6
|
+
|-----|------|-----|--------------|-------------|--------|-------|-------------------|
|
|
7
|
+
| Baseline 1 | 42 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
8
|
+
| Baseline 2 | 43 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
9
|
+
| Baseline 3 | 44 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
10
|
+
| Baseline 4 | 45 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
11
|
+
| Baseline 5 | 46 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
12
|
+
| Baseline 6 | 47 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
13
|
+
| Mean | -- | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
14
|
+
| Stddev | -- | TBD | TBD | TBD | TBD | TBD | -- |
|
|
15
|
+
|
|
16
|
+
6-seed fps spread: TBD% -- within 2%: TBD
|
|
17
|
+
|
|
18
|
+
GPU headroom (compute_headroom_pct = 100 - mean NVML util): TBD%
|
|
19
|
+
Memory free at peak: TBD GB
|
|
16
20
|
|
|
17
21
|
## Stream-concurrency verification
|
|
18
22
|
|
|
@@ -23,7 +27,7 @@ Host-side voxelization overlaps device-side backbone inference: **TBD**
|
|
|
23
27
|
python harness/smoke_kitti.py --cfg $OPENPCDET_CFG --ckpt $OPENPCDET_CKPT --n-frames 200
|
|
24
28
|
Open the .nsys-rep in Nsight Systems GUI, zoom in on a few consecutive frames,
|
|
25
29
|
look for CPU voxelization bar overlapping GPU backbone bar.
|
|
26
|
-
Screenshot
|
|
30
|
+
Screenshot -> benchmarks/kitti/concurrency_timeline.png
|
|
27
31
|
-->
|
|
28
32
|
|
|
29
33
|

|
|
@@ -36,11 +40,11 @@ invariant has no signal for this workload and the benchmark needs review.
|
|
|
36
40
|
|
|
37
41
|
## Notes
|
|
38
42
|
|
|
39
|
-
- Machine:
|
|
43
|
+
- Machine: RunPod y4xbh7yws2e4tu-64410cb0
|
|
40
44
|
- GPU: TBD
|
|
41
45
|
- Driver version: TBD
|
|
42
46
|
- CUDA version: TBD
|
|
43
|
-
- OpenPCDet commit:
|
|
44
|
-
- Config sha256:
|
|
47
|
+
- OpenPCDet commit: 233f849829b6ac19afb8af8837a0246890908755
|
|
48
|
+
- Config sha256: 170a9ffe76cfd8509d1044cfbcf1cbd44c5d320fda81bf0089a8d5efaf1c91c8
|
|
45
49
|
- Checkpoint sha256: 4c83fc0fa02575b9b3e9dec676f698e7a70bb5a795e89f91df8a96b916fa19e2
|
|
46
50
|
- Date: TBD
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# KITTI edge benchmark spec
|
|
2
|
+
|
|
3
|
+
## Section 1: Input definition
|
|
4
|
+
|
|
5
|
+
Datasets used:
|
|
6
|
+
- KITTI 3D Object Detection: 7,481 training frames, Velodyne lidar + calibration + labels
|
|
7
|
+
- Data location: `$GITM_DATA_ROOT/kitti/training/`
|
|
8
|
+
- Directory layout:
|
|
9
|
+
- `velodyne/` -- 000000.bin to 007480.bin (float32 XYZI point clouds)
|
|
10
|
+
- `calib/` -- 000000.txt to 007480.txt (camera-lidar calibration)
|
|
11
|
+
- `label_2/` -- 000000.txt to 007480.txt (3D bounding box annotations)
|
|
12
|
+
- Manifest: `benchmarks/kitti/manifest.yaml`
|
|
13
|
+
- Every file sha256-verified. Pass/fail gated by `python harness/verify_manifest.py`.
|
|
14
|
+
- Generate: `python harness/gen_kitti_manifest.py --root $GITM_DATA_ROOT/kitti/training`
|
|
15
|
+
|
|
16
|
+
## Section 2: Work unit
|
|
17
|
+
|
|
18
|
+
One frame processed end-to-end through:
|
|
19
|
+
|
|
20
|
+
voxelization -> 3D backbone (PointPillars) -> BEV head -> NMS -> detections
|
|
21
|
+
|
|
22
|
+
Model: OpenPCDet PointPillars (KITTI config)
|
|
23
|
+
|
|
24
|
+
- OpenPCDet commit: `233f849829b6ac19afb8af8837a0246890908755`
|
|
25
|
+
- Config (pointpillar.yaml) sha256: `170a9ffe76cfd8509d1044cfbcf1cbd44c5d320fda81bf0089a8d5efaf1c91c8`
|
|
26
|
+
- Checkpoint: `pointpillar_7728.pth`
|
|
27
|
+
- Checkpoint sha256: `4c83fc0fa02575b9b3e9dec676f698e7a70bb5a795e89f91df8a96b916fa19e2`
|
|
28
|
+
|
|
29
|
+
Stage breakdown per frame:
|
|
30
|
+
1. Load .bin (np.fromfile) -- CPU / data stall
|
|
31
|
+
2. Voxelization + H2D copy -- CPU / data stall
|
|
32
|
+
3. Backbone + BEV head -- GPU active
|
|
33
|
+
4. NMS + box assembly -- CPU / sync stall
|
|
34
|
+
|
|
35
|
+
Implementation: `gitm.benchmarks.kitti.WorkUnit`
|
|
36
|
+
|
|
37
|
+
## Section 3: Success metric
|
|
38
|
+
|
|
39
|
+
- Top-line metric: `frames_per_second` (timed warm window)
|
|
40
|
+
- Warm-up: 100 frames discarded before timing begins
|
|
41
|
+
- Disk pre-warm: all frames read once before GPU warmup (eliminates OS page cache
|
|
42
|
+
locality as a seed-ordering confound)
|
|
43
|
+
- Timed window: 7,381 frames (all training frames minus warmup)
|
|
44
|
+
- Convergence: 6 seeds (42-47) must agree within 2% fps spread
|
|
45
|
+
- GPU saturation check: GPU active % must be < 85%
|
|
46
|
+
- Auxiliary: `total_detections` per run (regression sentinel, not a target)
|
|
47
|
+
|
|
48
|
+
Baseline result (fill after running `bash harness/run_baselines.sh`):
|
|
49
|
+
|
|
50
|
+
| Seed | fps | GPU active % | Data stall % | Sync % | CPU % | Compute headroom % |
|
|
51
|
+
|------|-----|--------------|-------------|--------|-------|-------------------|
|
|
52
|
+
| 42 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
53
|
+
| 43 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
54
|
+
| 44 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
55
|
+
| 45 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
56
|
+
| 46 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
57
|
+
| 47 | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
58
|
+
| Mean | TBD | TBD | TBD | TBD | TBD | TBD |
|
|
59
|
+
| Stddev | TBD | TBD | TBD | TBD | TBD | -- |
|
|
60
|
+
|
|
61
|
+
6-seed fps spread: TBD -- within 2%: TBD
|
|
62
|
+
|
|
63
|
+
## Section 4: Expected stall profile
|
|
64
|
+
|
|
65
|
+
| Category | What it is | Expected % | Measured % |
|
|
66
|
+
|----------|-----------|------------|------------|
|
|
67
|
+
| Data stall | lidar .bin decode + host-side voxelization + H2D copy | 20-35% | TBD |
|
|
68
|
+
| Sync stall | NMS serialization on CPU | 10-20% | TBD |
|
|
69
|
+
| GPU active | backbone + BEV head forward pass | 50-65% | TBD |
|
|
70
|
+
| CPU overhead | Python dispatch, dataloader | ~5% | TBD |
|
|
71
|
+
|
|
72
|
+
**Critical check:** GPU active must be < 85%. If saturated, flag Adit same day
|
|
73
|
+
for 500-frame shard fallback.
|
|
74
|
+
|
|
75
|
+
**Stream-concurrency check (nsys):** host-side voxelization of frame N+1 should
|
|
76
|
+
overlap device-side backbone inference on frame N. Capture nsys timeline and
|
|
77
|
+
commit screenshot to `benchmarks/kitti/results.md`. If overlap is absent, the
|
|
78
|
+
stream-concurrency invariant has no signal -- flag Adit immediately.
|
|
79
|
+
|
|
80
|
+
## Section 5: GPU headroom (runtime integration)
|
|
81
|
+
|
|
82
|
+
Measured via `gitm.optimizer.headroom_kernel_rank.gpu_headroom()` using NVML
|
|
83
|
+
samples collected at 5 Hz during the timed window.
|
|
84
|
+
|
|
85
|
+
| Metric | Expected | Measured |
|
|
86
|
+
|--------|----------|---------|
|
|
87
|
+
| Compute headroom (100 - mean util) | >35% | TBD |
|
|
88
|
+
| Memory free at peak | >10 GB | TBD |
|
|
89
|
+
|
|
90
|
+
Per-stage spread (p50/p95 latency per stage across all frames):
|
|
91
|
+
|
|
92
|
+
| Stage | mean ms | p50 ms | p95 ms | % of frame |
|
|
93
|
+
|-------|---------|--------|--------|------------|
|
|
94
|
+
| load | TBD | TBD | TBD | TBD |
|
|
95
|
+
| preprocess (voxelize + H2D) | TBD | TBD | TBD | TBD |
|
|
96
|
+
| inference (backbone + BEV + NMS) | TBD | TBD | TBD | TBD |
|
|
97
|
+
| postprocess (D2H) | TBD | TBD | TBD | TBD |
|
|
98
|
+
|
|
99
|
+
Stage spread is emitted as `stage_spread` in each baseline JSON and as
|
|
100
|
+
`stage_spread_report.txt` alongside it.
|
|
101
|
+
|
|
102
|
+
## Environment
|
|
103
|
+
|
|
104
|
+
- Machine: RunPod y4xbh7yws2e4tu-64410cb0 (2 TB persistent /workspace)
|
|
105
|
+
- GPU: TBD
|
|
106
|
+
- Driver: TBD
|
|
107
|
+
- CUDA: TBD
|
|
108
|
+
- OpenPCDet commit: 233f849829b6ac19afb8af8837a0246890908755
|
|
109
|
+
- Date: TBD
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
from collections.abc import Callable
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
12
|
from gitm.scheduler import LoopConfig, run_loop
|
|
@@ -18,6 +19,7 @@ def optimize(
|
|
|
18
19
|
budget: str = "24h",
|
|
19
20
|
target: float = 0.15,
|
|
20
21
|
scratch: str | None = None,
|
|
22
|
+
workload_runner: Callable[[], dict[str, Any]] | None = None,
|
|
21
23
|
) -> dict[str, Any]:
|
|
22
24
|
"""Run the autonomous 24-hour optimization loop and return a report.
|
|
23
25
|
|
|
@@ -26,6 +28,11 @@ def optimize(
|
|
|
26
28
|
path. ``budget`` and ``target`` follow the SKU contract: a verified floor
|
|
27
29
|
of ``target`` fraction improvement within ``budget`` wall time, or a
|
|
28
30
|
qualification-gate diagnostic explaining why the floor was not committed.
|
|
31
|
+
|
|
32
|
+
``workload_runner`` optionally supplies an explicit zero-arg callable that
|
|
33
|
+
launches the workload's GPU work; it runs inside the capture window. When
|
|
34
|
+
omitted, the loop resolves ``workload`` against the registry in
|
|
35
|
+
:mod:`gitm.workloads`.
|
|
29
36
|
"""
|
|
30
37
|
cfg = LoopConfig(
|
|
31
38
|
engine=engine,
|
|
@@ -33,5 +40,6 @@ def optimize(
|
|
|
33
40
|
budget=budget,
|
|
34
41
|
target=target,
|
|
35
42
|
scratch=scratch,
|
|
43
|
+
workload_runner=workload_runner,
|
|
36
44
|
)
|
|
37
45
|
return run_loop(cfg)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""GITM edge (nuScenes) benchmark — CenterPoint-PointPillar baseline.
|
|
2
|
+
|
|
3
|
+
Mirrors gitm.benchmarks.kitti but for the nuScenes CenterPoint-PointPillar
|
|
4
|
+
(dyn / GPU-voxelization) baseline, with multi-sweep (keyframe + 9 sweeps)
|
|
5
|
+
point accumulation sourced from OpenPCDet's NuScenesDataset.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from gitm.benchmarks.edge.workunit import NuScenesWorkUnit, WorkUnitResult
|
|
9
|
+
|
|
10
|
+
__all__ = ["NuScenesWorkUnit", "WorkUnitResult"]
|