hotpath 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. hotpath-0.2.0/.github/workflows/ci.yml +41 -0
  2. hotpath-0.2.0/.github/workflows/gpu-smoke.yml +36 -0
  3. hotpath-0.2.0/.github/workflows/release.yml +72 -0
  4. hotpath-0.2.0/.gitignore +25 -0
  5. hotpath-0.2.0/.smoke-bench-a.json +1 -0
  6. hotpath-0.2.0/.smoke-bench-b.json +1 -0
  7. hotpath-0.2.0/CHANGELOG.md +49 -0
  8. hotpath-0.2.0/CMakeLists.txt +355 -0
  9. hotpath-0.2.0/PKG-INFO +159 -0
  10. hotpath-0.2.0/PLAN.MD +683 -0
  11. hotpath-0.2.0/README.md +131 -0
  12. hotpath-0.2.0/cpp_tests/fixtures/smoke_prefix_traffic.jsonl +10 -0
  13. hotpath-0.2.0/cpp_tests/fixtures/smoke_traffic.jsonl +10 -0
  14. hotpath-0.2.0/cpp_tests/fixtures/traffic.jsonl +5 -0
  15. hotpath-0.2.0/cpp_tests/test_aggregate.cpp +98 -0
  16. hotpath-0.2.0/cpp_tests/test_artifacts.cpp +73 -0
  17. hotpath-0.2.0/cpp_tests/test_attach.cpp +63 -0
  18. hotpath-0.2.0/cpp_tests/test_audit.cpp +2913 -0
  19. hotpath-0.2.0/cpp_tests/test_batch_analyzer.cpp +74 -0
  20. hotpath-0.2.0/cpp_tests/test_bench.cpp +79 -0
  21. hotpath-0.2.0/cpp_tests/test_bench_json.cpp +95 -0
  22. hotpath-0.2.0/cpp_tests/test_cache_analyzer.cpp +93 -0
  23. hotpath-0.2.0/cpp_tests/test_categorizer.cpp +101 -0
  24. hotpath-0.2.0/cpp_tests/test_cli.cpp +80 -0
  25. hotpath-0.2.0/cpp_tests/test_clock_control.cpp +64 -0
  26. hotpath-0.2.0/cpp_tests/test_diff.cpp +97 -0
  27. hotpath-0.2.0/cpp_tests/test_disagg_model.cpp +96 -0
  28. hotpath-0.2.0/cpp_tests/test_doctor.cpp +111 -0
  29. hotpath-0.2.0/cpp_tests/test_export.cpp +124 -0
  30. hotpath-0.2.0/cpp_tests/test_interactive.cpp +223 -0
  31. hotpath-0.2.0/cpp_tests/test_log_parser.cpp +232 -0
  32. hotpath-0.2.0/cpp_tests/test_otlp_export.cpp +97 -0
  33. hotpath-0.2.0/cpp_tests/test_parser.cpp +257 -0
  34. hotpath-0.2.0/cpp_tests/test_phase_analyzer.cpp +104 -0
  35. hotpath-0.2.0/cpp_tests/test_prefix_analyzer.cpp +97 -0
  36. hotpath-0.2.0/cpp_tests/test_recommender.cpp +94 -0
  37. hotpath-0.2.0/cpp_tests/test_remote.cpp +88 -0
  38. hotpath-0.2.0/cpp_tests/test_report.cpp +185 -0
  39. hotpath-0.2.0/cpp_tests/test_request_trace.cpp +116 -0
  40. hotpath-0.2.0/cpp_tests/test_serve_report.cpp +195 -0
  41. hotpath-0.2.0/cpp_tests/test_server.cpp +182 -0
  42. hotpath-0.2.0/cpp_tests/test_sglang_metrics.cpp +89 -0
  43. hotpath-0.2.0/cpp_tests/test_stability.cpp +139 -0
  44. hotpath-0.2.0/cpp_tests/test_store.cpp +134 -0
  45. hotpath-0.2.0/cpp_tests/test_targets.cpp +86 -0
  46. hotpath-0.2.0/cpp_tests/test_traffic.cpp +61 -0
  47. hotpath-0.2.0/cpp_tests/test_traffic_replayer.cpp +171 -0
  48. hotpath-0.2.0/cpp_tests/test_validate.cpp +180 -0
  49. hotpath-0.2.0/cpp_tests/test_vllm_metrics.cpp +225 -0
  50. hotpath-0.2.0/cpp_tests/test_workload_classifier.cpp +101 -0
  51. hotpath-0.2.0/hotpath_py/__init__.py +5 -0
  52. hotpath-0.2.0/hotpath_py/bench_cuda.py +510 -0
  53. hotpath-0.2.0/hotpath_py/bench_cuda_kernels.py +243 -0
  54. hotpath-0.2.0/hotpath_py/cli.py +16 -0
  55. hotpath-0.2.0/include/hotpath/aggregate.h +12 -0
  56. hotpath-0.2.0/include/hotpath/artifacts.h +26 -0
  57. hotpath-0.2.0/include/hotpath/batch_analyzer.h +33 -0
  58. hotpath-0.2.0/include/hotpath/bench/kernels/rms_norm.h +7 -0
  59. hotpath-0.2.0/include/hotpath/bench/kernels/rotary_emb.h +7 -0
  60. hotpath-0.2.0/include/hotpath/bench/kernels/silu_mul.h +7 -0
  61. hotpath-0.2.0/include/hotpath/bench/registry.h +30 -0
  62. hotpath-0.2.0/include/hotpath/bench/runner.h +93 -0
  63. hotpath-0.2.0/include/hotpath/cache_analyzer.h +30 -0
  64. hotpath-0.2.0/include/hotpath/clock_control.h +33 -0
  65. hotpath-0.2.0/include/hotpath/diff.h +33 -0
  66. hotpath-0.2.0/include/hotpath/disagg_model.h +45 -0
  67. hotpath-0.2.0/include/hotpath/doctor.h +53 -0
  68. hotpath-0.2.0/include/hotpath/export.h +13 -0
  69. hotpath-0.2.0/include/hotpath/kv_config.h +25 -0
  70. hotpath-0.2.0/include/hotpath/log_parser.h +22 -0
  71. hotpath-0.2.0/include/hotpath/ops.h +19 -0
  72. hotpath-0.2.0/include/hotpath/otlp_export.h +19 -0
  73. hotpath-0.2.0/include/hotpath/phase_analyzer.h +42 -0
  74. hotpath-0.2.0/include/hotpath/prefix_analyzer.h +26 -0
  75. hotpath-0.2.0/include/hotpath/profiler/attach.h +47 -0
  76. hotpath-0.2.0/include/hotpath/profiler/categorizer.h +26 -0
  77. hotpath-0.2.0/include/hotpath/profiler/kernel_record.h +20 -0
  78. hotpath-0.2.0/include/hotpath/profiler/parser.h +24 -0
  79. hotpath-0.2.0/include/hotpath/profiler/runner.h +58 -0
  80. hotpath-0.2.0/include/hotpath/profiler/server.h +46 -0
  81. hotpath-0.2.0/include/hotpath/profiler/vllm_metrics.h +38 -0
  82. hotpath-0.2.0/include/hotpath/recommender.h +14 -0
  83. hotpath-0.2.0/include/hotpath/remote.h +40 -0
  84. hotpath-0.2.0/include/hotpath/report.h +99 -0
  85. hotpath-0.2.0/include/hotpath/request_trace.h +35 -0
  86. hotpath-0.2.0/include/hotpath/serve_profiler.h +55 -0
  87. hotpath-0.2.0/include/hotpath/sglang_metrics.h +28 -0
  88. hotpath-0.2.0/include/hotpath/stability.h +33 -0
  89. hotpath-0.2.0/include/hotpath/store.h +60 -0
  90. hotpath-0.2.0/include/hotpath/targets.h +25 -0
  91. hotpath-0.2.0/include/hotpath/traffic.h +64 -0
  92. hotpath-0.2.0/include/hotpath/traffic_replayer.h +60 -0
  93. hotpath-0.2.0/include/hotpath/validate.h +27 -0
  94. hotpath-0.2.0/include/hotpath/workload_classifier.h +45 -0
  95. hotpath-0.2.0/pyproject.toml +89 -0
  96. hotpath-0.2.0/src/advisor/disagg_model.cpp +176 -0
  97. hotpath-0.2.0/src/advisor/kv_config.cpp +116 -0
  98. hotpath-0.2.0/src/advisor/recommender.cpp +146 -0
  99. hotpath-0.2.0/src/advisor/workload_classifier.cpp +44 -0
  100. hotpath-0.2.0/src/aggregate.cpp +133 -0
  101. hotpath-0.2.0/src/artifacts.cpp +136 -0
  102. hotpath-0.2.0/src/bench/kernels/rms_norm.cpp +105 -0
  103. hotpath-0.2.0/src/bench/kernels/rotary_emb.cpp +108 -0
  104. hotpath-0.2.0/src/bench/kernels/silu_mul.cpp +127 -0
  105. hotpath-0.2.0/src/bench/registry.cpp +43 -0
  106. hotpath-0.2.0/src/bench/runner.cpp +728 -0
  107. hotpath-0.2.0/src/clock_control.cpp +216 -0
  108. hotpath-0.2.0/src/diff.cpp +214 -0
  109. hotpath-0.2.0/src/doctor.cpp +389 -0
  110. hotpath-0.2.0/src/export/otlp_export.cpp +160 -0
  111. hotpath-0.2.0/src/export.cpp +258 -0
  112. hotpath-0.2.0/src/interactive.cpp +726 -0
  113. hotpath-0.2.0/src/interactive.h +77 -0
  114. hotpath-0.2.0/src/main.cpp +3091 -0
  115. hotpath-0.2.0/src/ops.cpp +176 -0
  116. hotpath-0.2.0/src/profiler/attach.cpp +386 -0
  117. hotpath-0.2.0/src/profiler/categorizer.cpp +87 -0
  118. hotpath-0.2.0/src/profiler/parser.cpp +449 -0
  119. hotpath-0.2.0/src/profiler/runner.cpp +1422 -0
  120. hotpath-0.2.0/src/profiler/server.cpp +522 -0
  121. hotpath-0.2.0/src/profiler/sglang_metrics.cpp +90 -0
  122. hotpath-0.2.0/src/profiler/vllm_metrics.cpp +262 -0
  123. hotpath-0.2.0/src/remote.cpp +115 -0
  124. hotpath-0.2.0/src/report.cpp +814 -0
  125. hotpath-0.2.0/src/serving/batch_analyzer.cpp +65 -0
  126. hotpath-0.2.0/src/serving/cache_analyzer.cpp +90 -0
  127. hotpath-0.2.0/src/serving/log_parser.cpp +414 -0
  128. hotpath-0.2.0/src/serving/phase_analyzer.cpp +75 -0
  129. hotpath-0.2.0/src/serving/prefix_analyzer.cpp +136 -0
  130. hotpath-0.2.0/src/serving/serve_profiler.cpp +1610 -0
  131. hotpath-0.2.0/src/serving/traffic_replayer.cpp +455 -0
  132. hotpath-0.2.0/src/stability.cpp +259 -0
  133. hotpath-0.2.0/src/store.cpp +628 -0
  134. hotpath-0.2.0/src/targets.cpp +187 -0
  135. hotpath-0.2.0/src/traffic.cpp +322 -0
  136. hotpath-0.2.0/src/validate.cpp +238 -0
@@ -0,0 +1,41 @@
1
+ name: ci
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ build-and-test:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+
13
+ - name: Install build dependencies
14
+ run: |
15
+ sudo apt-get update
16
+ sudo apt-get install -y cmake g++ libsqlite3-dev
17
+
18
+ - name: Configure
19
+ run: cmake -S . -B build
20
+
21
+ - name: Build
22
+ run: cmake --build build --parallel
23
+
24
+ - name: Test
25
+ run: ctest --test-dir build --output-on-failure
26
+
27
+ - name: CLI smoke
28
+ run: |
29
+ ./build/hotpath help
30
+ ./build/hotpath version
31
+ ./build/hotpath doctor
32
+ ./build/hotpath target --help
33
+ ./build/hotpath export --help
34
+ ./build/hotpath artifacts --help
35
+ ./build/hotpath trace --help
36
+ ./build/hotpath validate --help
37
+ ./build/hotpath manifest --help
38
+ ./build/hotpath cleanup --help
39
+ ./build/hotpath cluster-profile --help
40
+ ./build/hotpath soak-profile --help
41
+ ./build/hotpath completion bash >/tmp/hotpath.bash
@@ -0,0 +1,36 @@
1
+ name: gpu-smoke
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ schedule:
6
+ - cron: '0 9 * * 1'
7
+
8
+ jobs:
9
+ gpu-smoke:
10
+ runs-on: [self-hosted, gpu]
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - name: Configure
15
+ run: cmake -S . -B build
16
+
17
+ - name: Build
18
+ run: cmake --build build --parallel
19
+
20
+ - name: Unit tests
21
+ run: ctest --test-dir build --output-on-failure
22
+
23
+ - name: Doctor
24
+ run: ./build/hotpath doctor
25
+
26
+ - name: Bench smoke
27
+ run: ./build/hotpath bench --kernel silu_and_mul --shapes 64x4096 --warmup 10 --n-iter 50 --repeats 3 --output auto
28
+
29
+ - name: Optional profile smoke
30
+ shell: bash
31
+ run: |
32
+ if [[ -n "${RLPROF_SMOKE_MODEL:-}" ]]; then
33
+ ./build/hotpath profile --model "$RLPROF_SMOKE_MODEL" --prompts 1 --rollouts 1 --min-tokens 8 --max-tokens 8 --input-len 16 --output .hotpath/ci_smoke
34
+ else
35
+ echo "RLPROF_SMOKE_MODEL not set; skipping profile smoke"
36
+ fi
@@ -0,0 +1,72 @@
1
+ name: release
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ tags:
7
+ - "v*"
8
+ release:
9
+ types:
10
+ - published
11
+ workflow_dispatch:
12
+
13
+ jobs:
14
+ build_sdist:
15
+ name: build sdist
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.12"
23
+
24
+ - name: Build sdist
25
+ run: |
26
+ python -m pip install -U build
27
+ python -m build --sdist
28
+
29
+ - uses: actions/upload-artifact@v4
30
+ with:
31
+ name: dist-sdist
32
+ path: dist/*.tar.gz
33
+ if-no-files-found: error
34
+
35
+ build_linux_wheels:
36
+ name: build linux wheels
37
+ runs-on: ubuntu-latest
38
+ steps:
39
+ - uses: actions/checkout@v4
40
+
41
+ - name: Build wheels
42
+ uses: pypa/cibuildwheel@v3.1.4
43
+
44
+ - uses: actions/upload-artifact@v4
45
+ with:
46
+ name: dist-wheels-linux
47
+ path: wheelhouse/*.whl
48
+ if-no-files-found: error
49
+
50
+ publish_pypi:
51
+ name: publish to PyPI
52
+ needs:
53
+ - build_sdist
54
+ - build_linux_wheels
55
+ if: github.event_name == 'release' && github.event.action == 'published'
56
+ runs-on: ubuntu-latest
57
+ environment: pypi
58
+ permissions:
59
+ id-token: write
60
+ steps:
61
+ - uses: actions/download-artifact@v4
62
+ with:
63
+ name: dist-sdist
64
+ path: dist
65
+
66
+ - uses: actions/download-artifact@v4
67
+ with:
68
+ name: dist-wheels-linux
69
+ path: dist
70
+
71
+ - name: Publish package distributions to PyPI
72
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,25 @@
1
+ .venv/
2
+ .publish-venv/
3
+ build/
4
+ dist/
5
+ .rlprof/
6
+ .hotpath/
7
+ __pycache__/
8
+ *.pyc
9
+ *.pyo
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .cache/
13
+ *.db
14
+ *.sqlite
15
+ *.log
16
+ targets.cfg
17
+ *_meta.csv
18
+ *_kernels.csv
19
+ *_vllm_metrics.csv
20
+ *_vllm_metrics_summary.csv
21
+ *_traffic_stats.csv
22
+ AGENTS.md
23
+ logs.md
24
+ ._*
25
+ .DS_Store
@@ -0,0 +1 @@
1
+ {"gpu": {"name": "NVIDIA A10G", "driver_version": "580.126.16", "sm_clock_mhz": 1710.0, "mem_clock_mhz": 6251.0, "temp_c": 28.0, "power_draw_w": 61.76, "power_limit_w": 300.0}, "results": [{"kernel": "silu_and_mul", "implementation": "vllm-cuda", "shape": "64x4096", "dtype": "bf16", "avg_us": 12.847930257876195, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 12.78640553842183, "p50_us": 12.870378743589313, "p99_us": 12.88700649161744, "bandwidth_gb_s": 122.42158607887708, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": true, "unstable": true, "batch_invocations": 306, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-compile", "shape": "64x4096", "dtype": "bf16", "avg_us": 51.581309722350525, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 50.67416354342624, "p50_us": 50.86789260039458, "p99_us": 53.201873023230746, "bandwidth_gb_s": 30.49290544319908, "valid": true, "validation_max_abs_error": 0.0625, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 111, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-eager", "shape": "64x4096", "dtype": "bf16", "avg_us": 23.435274759928387, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 23.273543307655736, "p50_us": 23.30498318923147, "p99_us": 23.72729778289795, "bandwidth_gb_s": 67.11523616055126, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 228, "cuda_graph_replay": false}], "correctness_failures": [], "timing_warnings": [], "environment_warnings": ["silu_and_mul vllm-cuda 64x4096: power cap throttling observed"]}
@@ -0,0 +1 @@
1
+ {"gpu": {"name": "NVIDIA A10G", "driver_version": "580.126.16", "sm_clock_mhz": 1710.0, "mem_clock_mhz": 6251.0, "temp_c": 29.0, "power_draw_w": 62.45, "power_limit_w": 300.0}, "results": [{"kernel": "silu_and_mul", "implementation": "vllm-cuda", "shape": "64x4096", "dtype": "bf16", "avg_us": 13.006222468835336, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 12.952889005343119, "p50_us": 12.988444831636217, "p99_us": 13.077333569526672, "bandwidth_gb_s": 120.93165434996936, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": true, "unstable": true, "batch_invocations": 288, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-compile", "shape": "64x4096", "dtype": "bf16", "avg_us": 51.29658831699002, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 50.579149892011024, "p50_us": 51.280629916453925, "p99_us": 52.029985142505076, "bandwidth_gb_s": 30.66215613171782, "valid": true, "validation_max_abs_error": 0.03125, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 127, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-eager", "shape": "64x4096", "dtype": "bf16", "avg_us": 23.796132595070674, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 23.68084800164431, "p50_us": 23.714755544599317, "p99_us": 23.9927942389684, "bandwidth_gb_s": 66.09746326282516, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 151, "cuda_graph_replay": false}], "correctness_failures": [], "timing_warnings": [], "environment_warnings": ["silu_and_mul vllm-cuda 64x4096: power cap throttling observed", "silu_and_mul vllm-cuda 64x4096: thermal throttling observed"]}
@@ -0,0 +1,49 @@
1
+ # Changelog
2
+
3
+ ## v0.2.0 - 2026-04-05
4
+
5
+ Serving analysis, interactive TUI, and numerical hardening.
6
+
7
+ Highlights:
8
+
9
+ - `serve-profile` -- live dashboard with in-place redraws during traffic replay, `--concurrency N` for parallel in-flight requests, Prometheus metrics polled at 1 Hz with batch size / queue depth / KV cache tracking
10
+ - `serve-report` -- latency percentile table (TTFB, TTFT, decode per token, e2e), KV cache hit rate and eviction counts, prefix sharing analysis, disaggregation recommendation with estimated throughput improvement
11
+ - `disagg-config` -- deployment configs for disaggregated prefill/decode targeting vLLM, llm-d, and Dynamo
12
+ - Interactive arrow-key menus using DEC cursor save/restore (ESC 7/8) for reliable in-place redraws across all terminal types
13
+ - KV bytes auto-detection from HuggingFace `config.json` with full GQA support (`num_key_value_heads`, `head_dim`, dtype)
14
+ - Clock detection fallback for GPUs where current SM clock equals hardware max (A10G and similar cloud instances)
15
+ - Numerical fixes: cache hit rate clamped to [0, 1], eviction and preemption counts floored at 0, disaggregation throughput percentage uses `(improvement - 1) * 100` with rounding
16
+ - JSON injection protection in traffic replayer, temp file open/write failure detection
17
+ - `.gitignore` updated to exclude `.hotpath/` run artifacts, `*.log`, and `targets.cfg`
18
+ - README rewritten for production use
19
+
20
+ ## v0.1.2 - 2026-04-01
21
+
22
+ Packaging and release automation update.
23
+
24
+ Highlights:
25
+
26
+ - PyPI source distribution cleanup so local virtualenvs, build outputs, and macOS metadata files do not leak into published artifacts
27
+ - Linux wheel build configuration via `cibuildwheel` for CPython 3.10, 3.11, and 3.12 on `manylinux_2_28`
28
+ - GitHub Actions release workflow for building `sdist` plus Linux wheels and publishing to PyPI via trusted publishing on GitHub Releases
29
+
30
+ ## v0.1.0 - 2026-03-31
31
+
32
+ Initial public release.
33
+
34
+ Highlights:
35
+
36
+ - local and remote `profile`, `report`, `diff`, `export`, `validate`, `artifacts`, `trace`, and `doctor`
37
+ - interactive prompt flows alongside the existing flag-based CLI
38
+ - real GPU `bench` backed by `torch.cuda.Event`, archived JSON output, and `bench-compare`
39
+ - SSH target registry, bootstrap, recover, and remote single-host profiling
40
+ - profile stability mode, clock helpers, manifest generation, cleanup, soak runs, and cluster rollup
41
+ - non-GPU CI and self-hosted GPU smoke workflow
42
+ - controller-verified remote workflow using saved SSH targets, `target bootstrap`, `bench --target`, `profile --target`, and `recover`
43
+ - `profile --attach ... --attach-pid ...` support that now uses the best available host-local tracing path: native PID attach when `nsys` exposes it, clone-under-trace when a second local copy fits, and replace-and-restore when a single GPU cannot hold both copies
44
+ - attach-by-process fast reuse for `profile --repeat N` and `soak-profile`, so repeated attach runs reuse one traced replacement lifecycle instead of relaunching the fallback per iteration
45
+ - stricter `cluster-profile` host handling with distinct-host enforcement by default and explicit `--allow-duplicate-hosts` loopback testing
46
+ - fast reuse path for `profile --repeat N` and `soak-profile` when `hotpath` launches the server, with one server startup and one `nsys` session reused across iterations
47
+ - local managed warm-server workflow: `server start/list/show/stop` and `profile --server NAME`, reusing one loaded `vllm serve` and one `nsys` session across separate commands
48
+ - managed-server hardening: explicit `--max-model-len`, stale-state pruning, stale-lock recovery, and listener PID fallback from `lsof` to `ss`
49
+ - publish-ready package metadata for the `pip install hotpath` release surface, plus a rewritten README aligned to the controller-verified local/remote workflows
@@ -0,0 +1,355 @@
1
+ cmake_minimum_required(VERSION 3.28)
2
+
3
+ project(hotpath VERSION 0.1.1 LANGUAGES CXX)
4
+
5
+ set(CMAKE_CXX_STANDARD 20)
6
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
7
+ set(CMAKE_CXX_EXTENSIONS OFF)
8
+
9
+ find_package(SQLite3 REQUIRED)
10
+
11
+ add_library(
12
+ hotpath_core
13
+ src/bench/kernels/rms_norm.cpp
14
+ src/bench/kernels/rotary_emb.cpp
15
+ src/bench/kernels/silu_mul.cpp
16
+ src/bench/registry.cpp
17
+ src/bench/runner.cpp
18
+ src/aggregate.cpp
19
+ src/artifacts.cpp
20
+ src/clock_control.cpp
21
+ src/diff.cpp
22
+ src/doctor.cpp
23
+ src/export.cpp
24
+ src/ops.cpp
25
+ src/remote.cpp
26
+ src/stability.cpp
27
+ src/targets.cpp
28
+ src/traffic.cpp
29
+ src/validate.cpp
30
+ src/profiler/categorizer.cpp
31
+ src/profiler/attach.cpp
32
+ src/profiler/parser.cpp
33
+ src/profiler/runner.cpp
34
+ src/profiler/server.cpp
35
+ src/profiler/vllm_metrics.cpp
36
+ src/report.cpp
37
+ src/store.cpp
38
+ src/serving/log_parser.cpp
39
+ src/serving/phase_analyzer.cpp
40
+ src/serving/batch_analyzer.cpp
41
+ src/serving/cache_analyzer.cpp
42
+ src/serving/prefix_analyzer.cpp
43
+ src/advisor/workload_classifier.cpp
44
+ src/advisor/disagg_model.cpp
45
+ src/advisor/recommender.cpp
46
+ src/advisor/kv_config.cpp
47
+ src/serving/serve_profiler.cpp
48
+ src/serving/traffic_replayer.cpp
49
+ src/profiler/sglang_metrics.cpp
50
+ src/export/otlp_export.cpp
51
+ )
52
+
53
+ target_include_directories(
54
+ hotpath_core
55
+ PUBLIC
56
+ ${PROJECT_SOURCE_DIR}/include
57
+ )
58
+
59
+ target_link_libraries(
60
+ hotpath_core
61
+ PUBLIC
62
+ SQLite::SQLite3
63
+ )
64
+
65
+ add_library(
66
+ hotpath_interactive
67
+ src/interactive.cpp
68
+ )
69
+
70
+ target_include_directories(
71
+ hotpath_interactive
72
+ PUBLIC
73
+ ${PROJECT_SOURCE_DIR}/include
74
+ ${PROJECT_SOURCE_DIR}/src
75
+ )
76
+
77
+ target_link_libraries(
78
+ hotpath_interactive
79
+ PUBLIC
80
+ hotpath_core
81
+ )
82
+
83
+ include(CTest)
84
+
85
+ if(BUILD_TESTING)
86
+ add_executable(
87
+ test_categorizer
88
+ cpp_tests/test_categorizer.cpp
89
+ )
90
+ target_link_libraries(test_categorizer PRIVATE hotpath_core)
91
+ add_test(NAME test_categorizer COMMAND test_categorizer)
92
+
93
+ add_executable(
94
+ test_attach
95
+ cpp_tests/test_attach.cpp
96
+ )
97
+ target_link_libraries(test_attach PRIVATE hotpath_core)
98
+ add_test(NAME test_attach COMMAND test_attach)
99
+
100
+ add_executable(
101
+ test_parser
102
+ cpp_tests/test_parser.cpp
103
+ )
104
+ target_link_libraries(test_parser PRIVATE hotpath_core)
105
+ add_test(NAME test_parser COMMAND test_parser)
106
+
107
+ add_executable(
108
+ test_report
109
+ cpp_tests/test_report.cpp
110
+ )
111
+ target_link_libraries(test_report PRIVATE hotpath_core)
112
+ add_test(NAME test_report COMMAND test_report)
113
+
114
+ add_executable(
115
+ test_store
116
+ cpp_tests/test_store.cpp
117
+ )
118
+ target_link_libraries(test_store PRIVATE hotpath_core)
119
+ add_test(NAME test_store COMMAND test_store)
120
+
121
+ add_executable(
122
+ test_vllm_metrics
123
+ cpp_tests/test_vllm_metrics.cpp
124
+ )
125
+ target_link_libraries(test_vllm_metrics PRIVATE hotpath_core)
126
+ add_test(NAME test_vllm_metrics COMMAND test_vllm_metrics)
127
+
128
+ add_executable(
129
+ test_diff
130
+ cpp_tests/test_diff.cpp
131
+ )
132
+ target_link_libraries(test_diff PRIVATE hotpath_core)
133
+ add_test(NAME test_diff COMMAND test_diff)
134
+
135
+ add_executable(
136
+ test_bench
137
+ cpp_tests/test_bench.cpp
138
+ )
139
+ target_link_libraries(test_bench PRIVATE hotpath_core)
140
+ add_test(NAME test_bench COMMAND test_bench)
141
+
142
+ add_executable(
143
+ test_bench_json
144
+ cpp_tests/test_bench_json.cpp
145
+ )
146
+ target_link_libraries(test_bench_json PRIVATE hotpath_core)
147
+ add_test(NAME test_bench_json COMMAND test_bench_json)
148
+
149
+ add_executable(
150
+ test_stability
151
+ cpp_tests/test_stability.cpp
152
+ )
153
+ target_link_libraries(test_stability PRIVATE hotpath_core)
154
+ add_test(NAME test_stability COMMAND test_stability)
155
+
156
+ add_executable(
157
+ test_clock_control
158
+ cpp_tests/test_clock_control.cpp
159
+ )
160
+ target_link_libraries(test_clock_control PRIVATE hotpath_core)
161
+ add_test(NAME test_clock_control COMMAND test_clock_control)
162
+
163
+ add_executable(
164
+ test_export
165
+ cpp_tests/test_export.cpp
166
+ )
167
+ target_link_libraries(test_export PRIVATE hotpath_core)
168
+ add_test(NAME test_export COMMAND test_export)
169
+
170
+ add_executable(
171
+ test_aggregate
172
+ cpp_tests/test_aggregate.cpp
173
+ )
174
+ target_link_libraries(test_aggregate PRIVATE hotpath_core)
175
+ add_test(NAME test_aggregate COMMAND test_aggregate)
176
+
177
+ add_executable(
178
+ test_artifacts
179
+ cpp_tests/test_artifacts.cpp
180
+ )
181
+ target_link_libraries(test_artifacts PRIVATE hotpath_core)
182
+ add_test(NAME test_artifacts COMMAND test_artifacts)
183
+
184
+ add_executable(
185
+ test_doctor
186
+ cpp_tests/test_doctor.cpp
187
+ )
188
+ target_link_libraries(test_doctor PRIVATE hotpath_core)
189
+ add_test(NAME test_doctor COMMAND test_doctor)
190
+
191
+ add_executable(
192
+ test_validate
193
+ cpp_tests/test_validate.cpp
194
+ )
195
+ target_link_libraries(test_validate PRIVATE hotpath_core)
196
+ add_test(NAME test_validate COMMAND test_validate)
197
+
198
+ add_executable(
199
+ test_traffic
200
+ cpp_tests/test_traffic.cpp
201
+ )
202
+ target_link_libraries(test_traffic PRIVATE hotpath_core)
203
+ add_test(NAME test_traffic COMMAND test_traffic)
204
+
205
+ add_executable(
206
+ test_remote
207
+ cpp_tests/test_remote.cpp
208
+ )
209
+ target_link_libraries(test_remote PRIVATE hotpath_core)
210
+ add_test(NAME test_remote COMMAND test_remote)
211
+
212
+ add_executable(
213
+ test_targets
214
+ cpp_tests/test_targets.cpp
215
+ )
216
+ target_link_libraries(test_targets PRIVATE hotpath_core)
217
+ add_test(NAME test_targets COMMAND test_targets)
218
+
219
+ add_executable(
220
+ test_server
221
+ cpp_tests/test_server.cpp
222
+ )
223
+ target_link_libraries(test_server PRIVATE hotpath_core)
224
+ add_test(NAME test_server COMMAND test_server)
225
+
226
+ add_executable(
227
+ test_request_trace
228
+ cpp_tests/test_request_trace.cpp
229
+ )
230
+ target_link_libraries(test_request_trace PRIVATE hotpath_core)
231
+ add_test(NAME test_request_trace COMMAND test_request_trace)
232
+
233
+ add_executable(
234
+ test_log_parser
235
+ cpp_tests/test_log_parser.cpp
236
+ )
237
+ target_link_libraries(test_log_parser PRIVATE hotpath_core)
238
+ add_test(NAME test_log_parser COMMAND test_log_parser)
239
+
240
+ add_executable(
241
+ test_phase_analyzer
242
+ cpp_tests/test_phase_analyzer.cpp
243
+ )
244
+ target_link_libraries(test_phase_analyzer PRIVATE hotpath_core)
245
+ add_test(NAME test_phase_analyzer COMMAND test_phase_analyzer)
246
+
247
+ add_executable(
248
+ test_batch_analyzer
249
+ cpp_tests/test_batch_analyzer.cpp
250
+ )
251
+ target_link_libraries(test_batch_analyzer PRIVATE hotpath_core)
252
+ add_test(NAME test_batch_analyzer COMMAND test_batch_analyzer)
253
+
254
+ add_executable(
255
+ test_cache_analyzer
256
+ cpp_tests/test_cache_analyzer.cpp
257
+ )
258
+ target_link_libraries(test_cache_analyzer PRIVATE hotpath_core)
259
+ add_test(NAME test_cache_analyzer COMMAND test_cache_analyzer)
260
+
261
+ add_executable(
262
+ test_prefix_analyzer
263
+ cpp_tests/test_prefix_analyzer.cpp
264
+ )
265
+ target_link_libraries(test_prefix_analyzer PRIVATE hotpath_core)
266
+ add_test(NAME test_prefix_analyzer COMMAND test_prefix_analyzer)
267
+
268
+ add_executable(
269
+ test_workload_classifier
270
+ cpp_tests/test_workload_classifier.cpp
271
+ )
272
+ target_link_libraries(test_workload_classifier PRIVATE hotpath_core)
273
+ add_test(NAME test_workload_classifier COMMAND test_workload_classifier)
274
+
275
+ add_executable(
276
+ test_disagg_model
277
+ cpp_tests/test_disagg_model.cpp
278
+ )
279
+ target_link_libraries(test_disagg_model PRIVATE hotpath_core)
280
+ add_test(NAME test_disagg_model COMMAND test_disagg_model)
281
+
282
+ add_executable(
283
+ test_recommender
284
+ cpp_tests/test_recommender.cpp
285
+ )
286
+ target_link_libraries(test_recommender PRIVATE hotpath_core)
287
+ add_test(NAME test_recommender COMMAND test_recommender)
288
+
289
+ add_executable(
290
+ test_serve_report
291
+ cpp_tests/test_serve_report.cpp
292
+ )
293
+ target_link_libraries(test_serve_report PRIVATE hotpath_core)
294
+ add_test(NAME test_serve_report COMMAND test_serve_report)
295
+
296
+ add_executable(
297
+ test_traffic_replayer
298
+ cpp_tests/test_traffic_replayer.cpp
299
+ )
300
+ target_link_libraries(test_traffic_replayer PRIVATE hotpath_core)
301
+ add_test(NAME test_traffic_replayer COMMAND test_traffic_replayer)
302
+
303
+ add_executable(
304
+ test_sglang_metrics
305
+ cpp_tests/test_sglang_metrics.cpp
306
+ )
307
+ target_link_libraries(test_sglang_metrics PRIVATE hotpath_core)
308
+ add_test(NAME test_sglang_metrics COMMAND test_sglang_metrics)
309
+
310
+ add_executable(
311
+ test_otlp_export
312
+ cpp_tests/test_otlp_export.cpp
313
+ )
314
+ target_link_libraries(test_otlp_export PRIVATE hotpath_core)
315
+ add_test(NAME test_otlp_export COMMAND test_otlp_export)
316
+
317
+ add_executable(
318
+ test_audit
319
+ cpp_tests/test_audit.cpp
320
+ )
321
+ target_link_libraries(test_audit PRIVATE hotpath_core)
322
+ add_test(NAME test_audit COMMAND test_audit)
323
+
324
+ add_executable(
325
+ test_interactive
326
+ cpp_tests/test_interactive.cpp
327
+ )
328
+ target_link_libraries(test_interactive PRIVATE hotpath_interactive)
329
+ add_test(NAME test_interactive COMMAND test_interactive)
330
+
331
+ add_executable(
332
+ test_cli
333
+ cpp_tests/test_cli.cpp
334
+ )
335
+ add_dependencies(test_cli hotpath)
336
+ add_test(NAME test_cli COMMAND test_cli)
337
+ endif()
338
+
339
+ add_executable(
340
+ hotpath
341
+ src/main.cpp
342
+ )
343
+ target_include_directories(
344
+ hotpath
345
+ PRIVATE
346
+ ${PROJECT_SOURCE_DIR}/src
347
+ )
348
+ target_link_libraries(hotpath PRIVATE hotpath_interactive)
349
+
350
+ include(GNUInstallDirs)
351
+
352
+ install(
353
+ TARGETS hotpath
354
+ RUNTIME DESTINATION hotpath_py/bin
355
+ )