hotpath 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hotpath-0.2.0/.github/workflows/ci.yml +41 -0
- hotpath-0.2.0/.github/workflows/gpu-smoke.yml +36 -0
- hotpath-0.2.0/.github/workflows/release.yml +72 -0
- hotpath-0.2.0/.gitignore +25 -0
- hotpath-0.2.0/.smoke-bench-a.json +1 -0
- hotpath-0.2.0/.smoke-bench-b.json +1 -0
- hotpath-0.2.0/CHANGELOG.md +49 -0
- hotpath-0.2.0/CMakeLists.txt +355 -0
- hotpath-0.2.0/PKG-INFO +159 -0
- hotpath-0.2.0/PLAN.MD +683 -0
- hotpath-0.2.0/README.md +131 -0
- hotpath-0.2.0/cpp_tests/fixtures/smoke_prefix_traffic.jsonl +10 -0
- hotpath-0.2.0/cpp_tests/fixtures/smoke_traffic.jsonl +10 -0
- hotpath-0.2.0/cpp_tests/fixtures/traffic.jsonl +5 -0
- hotpath-0.2.0/cpp_tests/test_aggregate.cpp +98 -0
- hotpath-0.2.0/cpp_tests/test_artifacts.cpp +73 -0
- hotpath-0.2.0/cpp_tests/test_attach.cpp +63 -0
- hotpath-0.2.0/cpp_tests/test_audit.cpp +2913 -0
- hotpath-0.2.0/cpp_tests/test_batch_analyzer.cpp +74 -0
- hotpath-0.2.0/cpp_tests/test_bench.cpp +79 -0
- hotpath-0.2.0/cpp_tests/test_bench_json.cpp +95 -0
- hotpath-0.2.0/cpp_tests/test_cache_analyzer.cpp +93 -0
- hotpath-0.2.0/cpp_tests/test_categorizer.cpp +101 -0
- hotpath-0.2.0/cpp_tests/test_cli.cpp +80 -0
- hotpath-0.2.0/cpp_tests/test_clock_control.cpp +64 -0
- hotpath-0.2.0/cpp_tests/test_diff.cpp +97 -0
- hotpath-0.2.0/cpp_tests/test_disagg_model.cpp +96 -0
- hotpath-0.2.0/cpp_tests/test_doctor.cpp +111 -0
- hotpath-0.2.0/cpp_tests/test_export.cpp +124 -0
- hotpath-0.2.0/cpp_tests/test_interactive.cpp +223 -0
- hotpath-0.2.0/cpp_tests/test_log_parser.cpp +232 -0
- hotpath-0.2.0/cpp_tests/test_otlp_export.cpp +97 -0
- hotpath-0.2.0/cpp_tests/test_parser.cpp +257 -0
- hotpath-0.2.0/cpp_tests/test_phase_analyzer.cpp +104 -0
- hotpath-0.2.0/cpp_tests/test_prefix_analyzer.cpp +97 -0
- hotpath-0.2.0/cpp_tests/test_recommender.cpp +94 -0
- hotpath-0.2.0/cpp_tests/test_remote.cpp +88 -0
- hotpath-0.2.0/cpp_tests/test_report.cpp +185 -0
- hotpath-0.2.0/cpp_tests/test_request_trace.cpp +116 -0
- hotpath-0.2.0/cpp_tests/test_serve_report.cpp +195 -0
- hotpath-0.2.0/cpp_tests/test_server.cpp +182 -0
- hotpath-0.2.0/cpp_tests/test_sglang_metrics.cpp +89 -0
- hotpath-0.2.0/cpp_tests/test_stability.cpp +139 -0
- hotpath-0.2.0/cpp_tests/test_store.cpp +134 -0
- hotpath-0.2.0/cpp_tests/test_targets.cpp +86 -0
- hotpath-0.2.0/cpp_tests/test_traffic.cpp +61 -0
- hotpath-0.2.0/cpp_tests/test_traffic_replayer.cpp +171 -0
- hotpath-0.2.0/cpp_tests/test_validate.cpp +180 -0
- hotpath-0.2.0/cpp_tests/test_vllm_metrics.cpp +225 -0
- hotpath-0.2.0/cpp_tests/test_workload_classifier.cpp +101 -0
- hotpath-0.2.0/hotpath_py/__init__.py +5 -0
- hotpath-0.2.0/hotpath_py/bench_cuda.py +510 -0
- hotpath-0.2.0/hotpath_py/bench_cuda_kernels.py +243 -0
- hotpath-0.2.0/hotpath_py/cli.py +16 -0
- hotpath-0.2.0/include/hotpath/aggregate.h +12 -0
- hotpath-0.2.0/include/hotpath/artifacts.h +26 -0
- hotpath-0.2.0/include/hotpath/batch_analyzer.h +33 -0
- hotpath-0.2.0/include/hotpath/bench/kernels/rms_norm.h +7 -0
- hotpath-0.2.0/include/hotpath/bench/kernels/rotary_emb.h +7 -0
- hotpath-0.2.0/include/hotpath/bench/kernels/silu_mul.h +7 -0
- hotpath-0.2.0/include/hotpath/bench/registry.h +30 -0
- hotpath-0.2.0/include/hotpath/bench/runner.h +93 -0
- hotpath-0.2.0/include/hotpath/cache_analyzer.h +30 -0
- hotpath-0.2.0/include/hotpath/clock_control.h +33 -0
- hotpath-0.2.0/include/hotpath/diff.h +33 -0
- hotpath-0.2.0/include/hotpath/disagg_model.h +45 -0
- hotpath-0.2.0/include/hotpath/doctor.h +53 -0
- hotpath-0.2.0/include/hotpath/export.h +13 -0
- hotpath-0.2.0/include/hotpath/kv_config.h +25 -0
- hotpath-0.2.0/include/hotpath/log_parser.h +22 -0
- hotpath-0.2.0/include/hotpath/ops.h +19 -0
- hotpath-0.2.0/include/hotpath/otlp_export.h +19 -0
- hotpath-0.2.0/include/hotpath/phase_analyzer.h +42 -0
- hotpath-0.2.0/include/hotpath/prefix_analyzer.h +26 -0
- hotpath-0.2.0/include/hotpath/profiler/attach.h +47 -0
- hotpath-0.2.0/include/hotpath/profiler/categorizer.h +26 -0
- hotpath-0.2.0/include/hotpath/profiler/kernel_record.h +20 -0
- hotpath-0.2.0/include/hotpath/profiler/parser.h +24 -0
- hotpath-0.2.0/include/hotpath/profiler/runner.h +58 -0
- hotpath-0.2.0/include/hotpath/profiler/server.h +46 -0
- hotpath-0.2.0/include/hotpath/profiler/vllm_metrics.h +38 -0
- hotpath-0.2.0/include/hotpath/recommender.h +14 -0
- hotpath-0.2.0/include/hotpath/remote.h +40 -0
- hotpath-0.2.0/include/hotpath/report.h +99 -0
- hotpath-0.2.0/include/hotpath/request_trace.h +35 -0
- hotpath-0.2.0/include/hotpath/serve_profiler.h +55 -0
- hotpath-0.2.0/include/hotpath/sglang_metrics.h +28 -0
- hotpath-0.2.0/include/hotpath/stability.h +33 -0
- hotpath-0.2.0/include/hotpath/store.h +60 -0
- hotpath-0.2.0/include/hotpath/targets.h +25 -0
- hotpath-0.2.0/include/hotpath/traffic.h +64 -0
- hotpath-0.2.0/include/hotpath/traffic_replayer.h +60 -0
- hotpath-0.2.0/include/hotpath/validate.h +27 -0
- hotpath-0.2.0/include/hotpath/workload_classifier.h +45 -0
- hotpath-0.2.0/pyproject.toml +89 -0
- hotpath-0.2.0/src/advisor/disagg_model.cpp +176 -0
- hotpath-0.2.0/src/advisor/kv_config.cpp +116 -0
- hotpath-0.2.0/src/advisor/recommender.cpp +146 -0
- hotpath-0.2.0/src/advisor/workload_classifier.cpp +44 -0
- hotpath-0.2.0/src/aggregate.cpp +133 -0
- hotpath-0.2.0/src/artifacts.cpp +136 -0
- hotpath-0.2.0/src/bench/kernels/rms_norm.cpp +105 -0
- hotpath-0.2.0/src/bench/kernels/rotary_emb.cpp +108 -0
- hotpath-0.2.0/src/bench/kernels/silu_mul.cpp +127 -0
- hotpath-0.2.0/src/bench/registry.cpp +43 -0
- hotpath-0.2.0/src/bench/runner.cpp +728 -0
- hotpath-0.2.0/src/clock_control.cpp +216 -0
- hotpath-0.2.0/src/diff.cpp +214 -0
- hotpath-0.2.0/src/doctor.cpp +389 -0
- hotpath-0.2.0/src/export/otlp_export.cpp +160 -0
- hotpath-0.2.0/src/export.cpp +258 -0
- hotpath-0.2.0/src/interactive.cpp +726 -0
- hotpath-0.2.0/src/interactive.h +77 -0
- hotpath-0.2.0/src/main.cpp +3091 -0
- hotpath-0.2.0/src/ops.cpp +176 -0
- hotpath-0.2.0/src/profiler/attach.cpp +386 -0
- hotpath-0.2.0/src/profiler/categorizer.cpp +87 -0
- hotpath-0.2.0/src/profiler/parser.cpp +449 -0
- hotpath-0.2.0/src/profiler/runner.cpp +1422 -0
- hotpath-0.2.0/src/profiler/server.cpp +522 -0
- hotpath-0.2.0/src/profiler/sglang_metrics.cpp +90 -0
- hotpath-0.2.0/src/profiler/vllm_metrics.cpp +262 -0
- hotpath-0.2.0/src/remote.cpp +115 -0
- hotpath-0.2.0/src/report.cpp +814 -0
- hotpath-0.2.0/src/serving/batch_analyzer.cpp +65 -0
- hotpath-0.2.0/src/serving/cache_analyzer.cpp +90 -0
- hotpath-0.2.0/src/serving/log_parser.cpp +414 -0
- hotpath-0.2.0/src/serving/phase_analyzer.cpp +75 -0
- hotpath-0.2.0/src/serving/prefix_analyzer.cpp +136 -0
- hotpath-0.2.0/src/serving/serve_profiler.cpp +1610 -0
- hotpath-0.2.0/src/serving/traffic_replayer.cpp +455 -0
- hotpath-0.2.0/src/stability.cpp +259 -0
- hotpath-0.2.0/src/store.cpp +628 -0
- hotpath-0.2.0/src/targets.cpp +187 -0
- hotpath-0.2.0/src/traffic.cpp +322 -0
- hotpath-0.2.0/src/validate.cpp +238 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: ci
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
build-and-test:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
steps:
|
|
11
|
+
- uses: actions/checkout@v4
|
|
12
|
+
|
|
13
|
+
- name: Install build dependencies
|
|
14
|
+
run: |
|
|
15
|
+
sudo apt-get update
|
|
16
|
+
sudo apt-get install -y cmake g++ libsqlite3-dev
|
|
17
|
+
|
|
18
|
+
- name: Configure
|
|
19
|
+
run: cmake -S . -B build
|
|
20
|
+
|
|
21
|
+
- name: Build
|
|
22
|
+
run: cmake --build build --parallel
|
|
23
|
+
|
|
24
|
+
- name: Test
|
|
25
|
+
run: ctest --test-dir build --output-on-failure
|
|
26
|
+
|
|
27
|
+
- name: CLI smoke
|
|
28
|
+
run: |
|
|
29
|
+
./build/hotpath help
|
|
30
|
+
./build/hotpath version
|
|
31
|
+
./build/hotpath doctor
|
|
32
|
+
./build/hotpath target --help
|
|
33
|
+
./build/hotpath export --help
|
|
34
|
+
./build/hotpath artifacts --help
|
|
35
|
+
./build/hotpath trace --help
|
|
36
|
+
./build/hotpath validate --help
|
|
37
|
+
./build/hotpath manifest --help
|
|
38
|
+
./build/hotpath cleanup --help
|
|
39
|
+
./build/hotpath cluster-profile --help
|
|
40
|
+
./build/hotpath soak-profile --help
|
|
41
|
+
./build/hotpath completion bash >/tmp/hotpath.bash
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: gpu-smoke
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
schedule:
|
|
6
|
+
- cron: '0 9 * * 1'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
gpu-smoke:
|
|
10
|
+
runs-on: [self-hosted, gpu]
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Configure
|
|
15
|
+
run: cmake -S . -B build
|
|
16
|
+
|
|
17
|
+
- name: Build
|
|
18
|
+
run: cmake --build build --parallel
|
|
19
|
+
|
|
20
|
+
- name: Unit tests
|
|
21
|
+
run: ctest --test-dir build --output-on-failure
|
|
22
|
+
|
|
23
|
+
- name: Doctor
|
|
24
|
+
run: ./build/hotpath doctor
|
|
25
|
+
|
|
26
|
+
- name: Bench smoke
|
|
27
|
+
run: ./build/hotpath bench --kernel silu_and_mul --shapes 64x4096 --warmup 10 --n-iter 50 --repeats 3 --output auto
|
|
28
|
+
|
|
29
|
+
- name: Optional profile smoke
|
|
30
|
+
shell: bash
|
|
31
|
+
run: |
|
|
32
|
+
if [[ -n "${RLPROF_SMOKE_MODEL:-}" ]]; then
|
|
33
|
+
./build/hotpath profile --model "$RLPROF_SMOKE_MODEL" --prompts 1 --rollouts 1 --min-tokens 8 --max-tokens 8 --input-len 16 --output .hotpath/ci_smoke
|
|
34
|
+
else
|
|
35
|
+
echo "RLPROF_SMOKE_MODEL not set; skipping profile smoke"
|
|
36
|
+
fi
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
tags:
|
|
7
|
+
- "v*"
|
|
8
|
+
release:
|
|
9
|
+
types:
|
|
10
|
+
- published
|
|
11
|
+
workflow_dispatch:
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
build_sdist:
|
|
15
|
+
name: build sdist
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
|
|
24
|
+
- name: Build sdist
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install -U build
|
|
27
|
+
python -m build --sdist
|
|
28
|
+
|
|
29
|
+
- uses: actions/upload-artifact@v4
|
|
30
|
+
with:
|
|
31
|
+
name: dist-sdist
|
|
32
|
+
path: dist/*.tar.gz
|
|
33
|
+
if-no-files-found: error
|
|
34
|
+
|
|
35
|
+
build_linux_wheels:
|
|
36
|
+
name: build linux wheels
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
steps:
|
|
39
|
+
- uses: actions/checkout@v4
|
|
40
|
+
|
|
41
|
+
- name: Build wheels
|
|
42
|
+
uses: pypa/cibuildwheel@v3.1.4
|
|
43
|
+
|
|
44
|
+
- uses: actions/upload-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: dist-wheels-linux
|
|
47
|
+
path: wheelhouse/*.whl
|
|
48
|
+
if-no-files-found: error
|
|
49
|
+
|
|
50
|
+
publish_pypi:
|
|
51
|
+
name: publish to PyPI
|
|
52
|
+
needs:
|
|
53
|
+
- build_sdist
|
|
54
|
+
- build_linux_wheels
|
|
55
|
+
if: github.event_name == 'release' && github.event.action == 'published'
|
|
56
|
+
runs-on: ubuntu-latest
|
|
57
|
+
environment: pypi
|
|
58
|
+
permissions:
|
|
59
|
+
id-token: write
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/download-artifact@v4
|
|
62
|
+
with:
|
|
63
|
+
name: dist-sdist
|
|
64
|
+
path: dist
|
|
65
|
+
|
|
66
|
+
- uses: actions/download-artifact@v4
|
|
67
|
+
with:
|
|
68
|
+
name: dist-wheels-linux
|
|
69
|
+
path: dist
|
|
70
|
+
|
|
71
|
+
- name: Publish package distributions to PyPI
|
|
72
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
hotpath-0.2.0/.gitignore
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
.venv/
|
|
2
|
+
.publish-venv/
|
|
3
|
+
build/
|
|
4
|
+
dist/
|
|
5
|
+
.rlprof/
|
|
6
|
+
.hotpath/
|
|
7
|
+
__pycache__/
|
|
8
|
+
*.pyc
|
|
9
|
+
*.pyo
|
|
10
|
+
.pytest_cache/
|
|
11
|
+
.mypy_cache/
|
|
12
|
+
.cache/
|
|
13
|
+
*.db
|
|
14
|
+
*.sqlite
|
|
15
|
+
*.log
|
|
16
|
+
targets.cfg
|
|
17
|
+
*_meta.csv
|
|
18
|
+
*_kernels.csv
|
|
19
|
+
*_vllm_metrics.csv
|
|
20
|
+
*_vllm_metrics_summary.csv
|
|
21
|
+
*_traffic_stats.csv
|
|
22
|
+
AGENTS.md
|
|
23
|
+
logs.md
|
|
24
|
+
._*
|
|
25
|
+
.DS_Store
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"gpu": {"name": "NVIDIA A10G", "driver_version": "580.126.16", "sm_clock_mhz": 1710.0, "mem_clock_mhz": 6251.0, "temp_c": 28.0, "power_draw_w": 61.76, "power_limit_w": 300.0}, "results": [{"kernel": "silu_and_mul", "implementation": "vllm-cuda", "shape": "64x4096", "dtype": "bf16", "avg_us": 12.847930257876195, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 12.78640553842183, "p50_us": 12.870378743589313, "p99_us": 12.88700649161744, "bandwidth_gb_s": 122.42158607887708, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": true, "unstable": true, "batch_invocations": 306, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-compile", "shape": "64x4096", "dtype": "bf16", "avg_us": 51.581309722350525, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 50.67416354342624, "p50_us": 50.86789260039458, "p99_us": 53.201873023230746, "bandwidth_gb_s": 30.49290544319908, "valid": true, "validation_max_abs_error": 0.0625, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 111, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-eager", "shape": "64x4096", "dtype": "bf16", "avg_us": 23.435274759928387, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 23.273543307655736, "p50_us": 23.30498318923147, "p99_us": 23.72729778289795, "bandwidth_gb_s": 67.11523616055126, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 228, "cuda_graph_replay": false}], "correctness_failures": [], "timing_warnings": [], "environment_warnings": ["silu_and_mul vllm-cuda 64x4096: power cap throttling observed"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"gpu": {"name": "NVIDIA A10G", "driver_version": "580.126.16", "sm_clock_mhz": 1710.0, "mem_clock_mhz": 6251.0, "temp_c": 29.0, "power_draw_w": 62.45, "power_limit_w": 300.0}, "results": [{"kernel": "silu_and_mul", "implementation": "vllm-cuda", "shape": "64x4096", "dtype": "bf16", "avg_us": 13.006222468835336, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 12.952889005343119, "p50_us": 12.988444831636217, "p99_us": 13.077333569526672, "bandwidth_gb_s": 120.93165434996936, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": true, "unstable": true, "batch_invocations": 288, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-compile", "shape": "64x4096", "dtype": "bf16", "avg_us": 51.29658831699002, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 50.579149892011024, "p50_us": 51.280629916453925, "p99_us": 52.029985142505076, "bandwidth_gb_s": 30.66215613171782, "valid": true, "validation_max_abs_error": 0.03125, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 127, "cuda_graph_replay": false}, {"kernel": "silu_and_mul", "implementation": "torch-eager", "shape": "64x4096", "dtype": "bf16", "avg_us": 23.796132595070674, "stddev_us": 0.0, "cv_pct": 0.0, "min_us": 23.68084800164431, "p50_us": 23.714755544599317, "p99_us": 23.9927942389684, "bandwidth_gb_s": 66.09746326282516, "valid": true, "validation_max_abs_error": 0.0, "deterministic": true, "determinism_max_abs_error": 0.0, "timing_warning": false, "environment_warning": false, "unstable": false, "batch_invocations": 151, "cuda_graph_replay": false}], "correctness_failures": [], "timing_warnings": [], "environment_warnings": ["silu_and_mul vllm-cuda 64x4096: power cap throttling observed", "silu_and_mul vllm-cuda 64x4096: thermal throttling observed"]}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## v0.2.0 - 2026-04-05
|
|
4
|
+
|
|
5
|
+
Serving analysis, interactive TUI, and numerical hardening.
|
|
6
|
+
|
|
7
|
+
Highlights:
|
|
8
|
+
|
|
9
|
+
- `serve-profile` -- live dashboard with in-place redraws during traffic replay, `--concurrency N` for parallel in-flight requests, Prometheus metrics polled at 1 Hz with batch size / queue depth / KV cache tracking
|
|
10
|
+
- `serve-report` -- latency percentile table (TTFB, TTFT, decode per token, e2e), KV cache hit rate and eviction counts, prefix sharing analysis, disaggregation recommendation with estimated throughput improvement
|
|
11
|
+
- `disagg-config` -- deployment configs for disaggregated prefill/decode targeting vLLM, llm-d, and Dynamo
|
|
12
|
+
- Interactive arrow-key menus using DEC cursor save/restore (ESC 7/8) for reliable in-place redraws across all terminal types
|
|
13
|
+
- KV bytes auto-detection from HuggingFace `config.json` with full GQA support (`num_key_value_heads`, `head_dim`, dtype)
|
|
14
|
+
- Clock detection fallback for GPUs where current SM clock equals hardware max (A10G and similar cloud instances)
|
|
15
|
+
- Numerical fixes: cache hit rate clamped to [0, 1], eviction and preemption counts floored at 0, disaggregation throughput percentage uses `(improvement - 1) * 100` with rounding
|
|
16
|
+
- JSON injection protection in traffic replayer, temp file open/write failure detection
|
|
17
|
+
- `.gitignore` updated to exclude `.hotpath/` run artifacts, `*.log`, and `targets.cfg`
|
|
18
|
+
- README rewritten for production use
|
|
19
|
+
|
|
20
|
+
## v0.1.2 - 2026-04-01
|
|
21
|
+
|
|
22
|
+
Packaging and release automation update.
|
|
23
|
+
|
|
24
|
+
Highlights:
|
|
25
|
+
|
|
26
|
+
- PyPI source distribution cleanup so local virtualenvs, build outputs, and macOS metadata files do not leak into published artifacts
|
|
27
|
+
- Linux wheel build configuration via `cibuildwheel` for CPython 3.10, 3.11, and 3.12 on `manylinux_2_28`
|
|
28
|
+
- GitHub Actions release workflow for building `sdist` plus Linux wheels and publishing to PyPI via trusted publishing on GitHub Releases
|
|
29
|
+
|
|
30
|
+
## v0.1.0 - 2026-03-31
|
|
31
|
+
|
|
32
|
+
Initial public release.
|
|
33
|
+
|
|
34
|
+
Highlights:
|
|
35
|
+
|
|
36
|
+
- local and remote `profile`, `report`, `diff`, `export`, `validate`, `artifacts`, `trace`, and `doctor`
|
|
37
|
+
- interactive prompt flows alongside the existing flag-based CLI
|
|
38
|
+
- real GPU `bench` backed by `torch.cuda.Event`, archived JSON output, and `bench-compare`
|
|
39
|
+
- SSH target registry, bootstrap, recover, and remote single-host profiling
|
|
40
|
+
- profile stability mode, clock helpers, manifest generation, cleanup, soak runs, and cluster rollup
|
|
41
|
+
- non-GPU CI and self-hosted GPU smoke workflow
|
|
42
|
+
- controller-verified remote workflow using saved SSH targets, `target bootstrap`, `bench --target`, `profile --target`, and `recover`
|
|
43
|
+
- `profile --attach ... --attach-pid ...` support that now uses the best available host-local tracing path: native PID attach when `nsys` exposes it, clone-under-trace when a second local copy fits, and replace-and-restore when a single GPU cannot hold both copies
|
|
44
|
+
- attach-by-process fast reuse for `profile --repeat N` and `soak-profile`, so repeated attach runs reuse one traced replacement lifecycle instead of relaunching the fallback per iteration
|
|
45
|
+
- stricter `cluster-profile` host handling with distinct-host enforcement by default and explicit `--allow-duplicate-hosts` loopback testing
|
|
46
|
+
- fast reuse path for `profile --repeat N` and `soak-profile` when `hotpath` launches the server, with one server startup and one `nsys` session reused across iterations
|
|
47
|
+
- local managed warm-server workflow: `server start/list/show/stop` and `profile --server NAME`, reusing one loaded `vllm serve` and one `nsys` session across separate commands
|
|
48
|
+
- managed-server hardening: explicit `--max-model-len`, stale-state pruning, stale-lock recovery, and listener PID fallback from `lsof` to `ss`
|
|
49
|
+
- publish-ready package metadata for the `pip install hotpath` release surface, plus a rewritten README aligned to the controller-verified local/remote workflows
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.28)
|
|
2
|
+
|
|
3
|
+
project(hotpath VERSION 0.1.1 LANGUAGES CXX)
|
|
4
|
+
|
|
5
|
+
set(CMAKE_CXX_STANDARD 20)
|
|
6
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
7
|
+
set(CMAKE_CXX_EXTENSIONS OFF)
|
|
8
|
+
|
|
9
|
+
find_package(SQLite3 REQUIRED)
|
|
10
|
+
|
|
11
|
+
add_library(
|
|
12
|
+
hotpath_core
|
|
13
|
+
src/bench/kernels/rms_norm.cpp
|
|
14
|
+
src/bench/kernels/rotary_emb.cpp
|
|
15
|
+
src/bench/kernels/silu_mul.cpp
|
|
16
|
+
src/bench/registry.cpp
|
|
17
|
+
src/bench/runner.cpp
|
|
18
|
+
src/aggregate.cpp
|
|
19
|
+
src/artifacts.cpp
|
|
20
|
+
src/clock_control.cpp
|
|
21
|
+
src/diff.cpp
|
|
22
|
+
src/doctor.cpp
|
|
23
|
+
src/export.cpp
|
|
24
|
+
src/ops.cpp
|
|
25
|
+
src/remote.cpp
|
|
26
|
+
src/stability.cpp
|
|
27
|
+
src/targets.cpp
|
|
28
|
+
src/traffic.cpp
|
|
29
|
+
src/validate.cpp
|
|
30
|
+
src/profiler/categorizer.cpp
|
|
31
|
+
src/profiler/attach.cpp
|
|
32
|
+
src/profiler/parser.cpp
|
|
33
|
+
src/profiler/runner.cpp
|
|
34
|
+
src/profiler/server.cpp
|
|
35
|
+
src/profiler/vllm_metrics.cpp
|
|
36
|
+
src/report.cpp
|
|
37
|
+
src/store.cpp
|
|
38
|
+
src/serving/log_parser.cpp
|
|
39
|
+
src/serving/phase_analyzer.cpp
|
|
40
|
+
src/serving/batch_analyzer.cpp
|
|
41
|
+
src/serving/cache_analyzer.cpp
|
|
42
|
+
src/serving/prefix_analyzer.cpp
|
|
43
|
+
src/advisor/workload_classifier.cpp
|
|
44
|
+
src/advisor/disagg_model.cpp
|
|
45
|
+
src/advisor/recommender.cpp
|
|
46
|
+
src/advisor/kv_config.cpp
|
|
47
|
+
src/serving/serve_profiler.cpp
|
|
48
|
+
src/serving/traffic_replayer.cpp
|
|
49
|
+
src/profiler/sglang_metrics.cpp
|
|
50
|
+
src/export/otlp_export.cpp
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
target_include_directories(
|
|
54
|
+
hotpath_core
|
|
55
|
+
PUBLIC
|
|
56
|
+
${PROJECT_SOURCE_DIR}/include
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
target_link_libraries(
|
|
60
|
+
hotpath_core
|
|
61
|
+
PUBLIC
|
|
62
|
+
SQLite::SQLite3
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
add_library(
|
|
66
|
+
hotpath_interactive
|
|
67
|
+
src/interactive.cpp
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
target_include_directories(
|
|
71
|
+
hotpath_interactive
|
|
72
|
+
PUBLIC
|
|
73
|
+
${PROJECT_SOURCE_DIR}/include
|
|
74
|
+
${PROJECT_SOURCE_DIR}/src
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
target_link_libraries(
|
|
78
|
+
hotpath_interactive
|
|
79
|
+
PUBLIC
|
|
80
|
+
hotpath_core
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
include(CTest)
|
|
84
|
+
|
|
85
|
+
if(BUILD_TESTING)
|
|
86
|
+
add_executable(
|
|
87
|
+
test_categorizer
|
|
88
|
+
cpp_tests/test_categorizer.cpp
|
|
89
|
+
)
|
|
90
|
+
target_link_libraries(test_categorizer PRIVATE hotpath_core)
|
|
91
|
+
add_test(NAME test_categorizer COMMAND test_categorizer)
|
|
92
|
+
|
|
93
|
+
add_executable(
|
|
94
|
+
test_attach
|
|
95
|
+
cpp_tests/test_attach.cpp
|
|
96
|
+
)
|
|
97
|
+
target_link_libraries(test_attach PRIVATE hotpath_core)
|
|
98
|
+
add_test(NAME test_attach COMMAND test_attach)
|
|
99
|
+
|
|
100
|
+
add_executable(
|
|
101
|
+
test_parser
|
|
102
|
+
cpp_tests/test_parser.cpp
|
|
103
|
+
)
|
|
104
|
+
target_link_libraries(test_parser PRIVATE hotpath_core)
|
|
105
|
+
add_test(NAME test_parser COMMAND test_parser)
|
|
106
|
+
|
|
107
|
+
add_executable(
|
|
108
|
+
test_report
|
|
109
|
+
cpp_tests/test_report.cpp
|
|
110
|
+
)
|
|
111
|
+
target_link_libraries(test_report PRIVATE hotpath_core)
|
|
112
|
+
add_test(NAME test_report COMMAND test_report)
|
|
113
|
+
|
|
114
|
+
add_executable(
|
|
115
|
+
test_store
|
|
116
|
+
cpp_tests/test_store.cpp
|
|
117
|
+
)
|
|
118
|
+
target_link_libraries(test_store PRIVATE hotpath_core)
|
|
119
|
+
add_test(NAME test_store COMMAND test_store)
|
|
120
|
+
|
|
121
|
+
add_executable(
|
|
122
|
+
test_vllm_metrics
|
|
123
|
+
cpp_tests/test_vllm_metrics.cpp
|
|
124
|
+
)
|
|
125
|
+
target_link_libraries(test_vllm_metrics PRIVATE hotpath_core)
|
|
126
|
+
add_test(NAME test_vllm_metrics COMMAND test_vllm_metrics)
|
|
127
|
+
|
|
128
|
+
add_executable(
|
|
129
|
+
test_diff
|
|
130
|
+
cpp_tests/test_diff.cpp
|
|
131
|
+
)
|
|
132
|
+
target_link_libraries(test_diff PRIVATE hotpath_core)
|
|
133
|
+
add_test(NAME test_diff COMMAND test_diff)
|
|
134
|
+
|
|
135
|
+
add_executable(
|
|
136
|
+
test_bench
|
|
137
|
+
cpp_tests/test_bench.cpp
|
|
138
|
+
)
|
|
139
|
+
target_link_libraries(test_bench PRIVATE hotpath_core)
|
|
140
|
+
add_test(NAME test_bench COMMAND test_bench)
|
|
141
|
+
|
|
142
|
+
add_executable(
|
|
143
|
+
test_bench_json
|
|
144
|
+
cpp_tests/test_bench_json.cpp
|
|
145
|
+
)
|
|
146
|
+
target_link_libraries(test_bench_json PRIVATE hotpath_core)
|
|
147
|
+
add_test(NAME test_bench_json COMMAND test_bench_json)
|
|
148
|
+
|
|
149
|
+
add_executable(
|
|
150
|
+
test_stability
|
|
151
|
+
cpp_tests/test_stability.cpp
|
|
152
|
+
)
|
|
153
|
+
target_link_libraries(test_stability PRIVATE hotpath_core)
|
|
154
|
+
add_test(NAME test_stability COMMAND test_stability)
|
|
155
|
+
|
|
156
|
+
add_executable(
|
|
157
|
+
test_clock_control
|
|
158
|
+
cpp_tests/test_clock_control.cpp
|
|
159
|
+
)
|
|
160
|
+
target_link_libraries(test_clock_control PRIVATE hotpath_core)
|
|
161
|
+
add_test(NAME test_clock_control COMMAND test_clock_control)
|
|
162
|
+
|
|
163
|
+
add_executable(
|
|
164
|
+
test_export
|
|
165
|
+
cpp_tests/test_export.cpp
|
|
166
|
+
)
|
|
167
|
+
target_link_libraries(test_export PRIVATE hotpath_core)
|
|
168
|
+
add_test(NAME test_export COMMAND test_export)
|
|
169
|
+
|
|
170
|
+
add_executable(
|
|
171
|
+
test_aggregate
|
|
172
|
+
cpp_tests/test_aggregate.cpp
|
|
173
|
+
)
|
|
174
|
+
target_link_libraries(test_aggregate PRIVATE hotpath_core)
|
|
175
|
+
add_test(NAME test_aggregate COMMAND test_aggregate)
|
|
176
|
+
|
|
177
|
+
add_executable(
|
|
178
|
+
test_artifacts
|
|
179
|
+
cpp_tests/test_artifacts.cpp
|
|
180
|
+
)
|
|
181
|
+
target_link_libraries(test_artifacts PRIVATE hotpath_core)
|
|
182
|
+
add_test(NAME test_artifacts COMMAND test_artifacts)
|
|
183
|
+
|
|
184
|
+
add_executable(
|
|
185
|
+
test_doctor
|
|
186
|
+
cpp_tests/test_doctor.cpp
|
|
187
|
+
)
|
|
188
|
+
target_link_libraries(test_doctor PRIVATE hotpath_core)
|
|
189
|
+
add_test(NAME test_doctor COMMAND test_doctor)
|
|
190
|
+
|
|
191
|
+
add_executable(
|
|
192
|
+
test_validate
|
|
193
|
+
cpp_tests/test_validate.cpp
|
|
194
|
+
)
|
|
195
|
+
target_link_libraries(test_validate PRIVATE hotpath_core)
|
|
196
|
+
add_test(NAME test_validate COMMAND test_validate)
|
|
197
|
+
|
|
198
|
+
add_executable(
|
|
199
|
+
test_traffic
|
|
200
|
+
cpp_tests/test_traffic.cpp
|
|
201
|
+
)
|
|
202
|
+
target_link_libraries(test_traffic PRIVATE hotpath_core)
|
|
203
|
+
add_test(NAME test_traffic COMMAND test_traffic)
|
|
204
|
+
|
|
205
|
+
add_executable(
|
|
206
|
+
test_remote
|
|
207
|
+
cpp_tests/test_remote.cpp
|
|
208
|
+
)
|
|
209
|
+
target_link_libraries(test_remote PRIVATE hotpath_core)
|
|
210
|
+
add_test(NAME test_remote COMMAND test_remote)
|
|
211
|
+
|
|
212
|
+
add_executable(
|
|
213
|
+
test_targets
|
|
214
|
+
cpp_tests/test_targets.cpp
|
|
215
|
+
)
|
|
216
|
+
target_link_libraries(test_targets PRIVATE hotpath_core)
|
|
217
|
+
add_test(NAME test_targets COMMAND test_targets)
|
|
218
|
+
|
|
219
|
+
add_executable(
|
|
220
|
+
test_server
|
|
221
|
+
cpp_tests/test_server.cpp
|
|
222
|
+
)
|
|
223
|
+
target_link_libraries(test_server PRIVATE hotpath_core)
|
|
224
|
+
add_test(NAME test_server COMMAND test_server)
|
|
225
|
+
|
|
226
|
+
add_executable(
|
|
227
|
+
test_request_trace
|
|
228
|
+
cpp_tests/test_request_trace.cpp
|
|
229
|
+
)
|
|
230
|
+
target_link_libraries(test_request_trace PRIVATE hotpath_core)
|
|
231
|
+
add_test(NAME test_request_trace COMMAND test_request_trace)
|
|
232
|
+
|
|
233
|
+
add_executable(
|
|
234
|
+
test_log_parser
|
|
235
|
+
cpp_tests/test_log_parser.cpp
|
|
236
|
+
)
|
|
237
|
+
target_link_libraries(test_log_parser PRIVATE hotpath_core)
|
|
238
|
+
add_test(NAME test_log_parser COMMAND test_log_parser)
|
|
239
|
+
|
|
240
|
+
add_executable(
|
|
241
|
+
test_phase_analyzer
|
|
242
|
+
cpp_tests/test_phase_analyzer.cpp
|
|
243
|
+
)
|
|
244
|
+
target_link_libraries(test_phase_analyzer PRIVATE hotpath_core)
|
|
245
|
+
add_test(NAME test_phase_analyzer COMMAND test_phase_analyzer)
|
|
246
|
+
|
|
247
|
+
add_executable(
|
|
248
|
+
test_batch_analyzer
|
|
249
|
+
cpp_tests/test_batch_analyzer.cpp
|
|
250
|
+
)
|
|
251
|
+
target_link_libraries(test_batch_analyzer PRIVATE hotpath_core)
|
|
252
|
+
add_test(NAME test_batch_analyzer COMMAND test_batch_analyzer)
|
|
253
|
+
|
|
254
|
+
add_executable(
|
|
255
|
+
test_cache_analyzer
|
|
256
|
+
cpp_tests/test_cache_analyzer.cpp
|
|
257
|
+
)
|
|
258
|
+
target_link_libraries(test_cache_analyzer PRIVATE hotpath_core)
|
|
259
|
+
add_test(NAME test_cache_analyzer COMMAND test_cache_analyzer)
|
|
260
|
+
|
|
261
|
+
add_executable(
|
|
262
|
+
test_prefix_analyzer
|
|
263
|
+
cpp_tests/test_prefix_analyzer.cpp
|
|
264
|
+
)
|
|
265
|
+
target_link_libraries(test_prefix_analyzer PRIVATE hotpath_core)
|
|
266
|
+
add_test(NAME test_prefix_analyzer COMMAND test_prefix_analyzer)
|
|
267
|
+
|
|
268
|
+
add_executable(
|
|
269
|
+
test_workload_classifier
|
|
270
|
+
cpp_tests/test_workload_classifier.cpp
|
|
271
|
+
)
|
|
272
|
+
target_link_libraries(test_workload_classifier PRIVATE hotpath_core)
|
|
273
|
+
add_test(NAME test_workload_classifier COMMAND test_workload_classifier)
|
|
274
|
+
|
|
275
|
+
add_executable(
|
|
276
|
+
test_disagg_model
|
|
277
|
+
cpp_tests/test_disagg_model.cpp
|
|
278
|
+
)
|
|
279
|
+
target_link_libraries(test_disagg_model PRIVATE hotpath_core)
|
|
280
|
+
add_test(NAME test_disagg_model COMMAND test_disagg_model)
|
|
281
|
+
|
|
282
|
+
add_executable(
|
|
283
|
+
test_recommender
|
|
284
|
+
cpp_tests/test_recommender.cpp
|
|
285
|
+
)
|
|
286
|
+
target_link_libraries(test_recommender PRIVATE hotpath_core)
|
|
287
|
+
add_test(NAME test_recommender COMMAND test_recommender)
|
|
288
|
+
|
|
289
|
+
add_executable(
|
|
290
|
+
test_serve_report
|
|
291
|
+
cpp_tests/test_serve_report.cpp
|
|
292
|
+
)
|
|
293
|
+
target_link_libraries(test_serve_report PRIVATE hotpath_core)
|
|
294
|
+
add_test(NAME test_serve_report COMMAND test_serve_report)
|
|
295
|
+
|
|
296
|
+
add_executable(
|
|
297
|
+
test_traffic_replayer
|
|
298
|
+
cpp_tests/test_traffic_replayer.cpp
|
|
299
|
+
)
|
|
300
|
+
target_link_libraries(test_traffic_replayer PRIVATE hotpath_core)
|
|
301
|
+
add_test(NAME test_traffic_replayer COMMAND test_traffic_replayer)
|
|
302
|
+
|
|
303
|
+
add_executable(
|
|
304
|
+
test_sglang_metrics
|
|
305
|
+
cpp_tests/test_sglang_metrics.cpp
|
|
306
|
+
)
|
|
307
|
+
target_link_libraries(test_sglang_metrics PRIVATE hotpath_core)
|
|
308
|
+
add_test(NAME test_sglang_metrics COMMAND test_sglang_metrics)
|
|
309
|
+
|
|
310
|
+
add_executable(
|
|
311
|
+
test_otlp_export
|
|
312
|
+
cpp_tests/test_otlp_export.cpp
|
|
313
|
+
)
|
|
314
|
+
target_link_libraries(test_otlp_export PRIVATE hotpath_core)
|
|
315
|
+
add_test(NAME test_otlp_export COMMAND test_otlp_export)
|
|
316
|
+
|
|
317
|
+
add_executable(
|
|
318
|
+
test_audit
|
|
319
|
+
cpp_tests/test_audit.cpp
|
|
320
|
+
)
|
|
321
|
+
target_link_libraries(test_audit PRIVATE hotpath_core)
|
|
322
|
+
add_test(NAME test_audit COMMAND test_audit)
|
|
323
|
+
|
|
324
|
+
add_executable(
|
|
325
|
+
test_interactive
|
|
326
|
+
cpp_tests/test_interactive.cpp
|
|
327
|
+
)
|
|
328
|
+
target_link_libraries(test_interactive PRIVATE hotpath_interactive)
|
|
329
|
+
add_test(NAME test_interactive COMMAND test_interactive)
|
|
330
|
+
|
|
331
|
+
add_executable(
|
|
332
|
+
test_cli
|
|
333
|
+
cpp_tests/test_cli.cpp
|
|
334
|
+
)
|
|
335
|
+
add_dependencies(test_cli hotpath)
|
|
336
|
+
add_test(NAME test_cli COMMAND test_cli)
|
|
337
|
+
endif()
|
|
338
|
+
|
|
339
|
+
add_executable(
|
|
340
|
+
hotpath
|
|
341
|
+
src/main.cpp
|
|
342
|
+
)
|
|
343
|
+
target_include_directories(
|
|
344
|
+
hotpath
|
|
345
|
+
PRIVATE
|
|
346
|
+
${PROJECT_SOURCE_DIR}/src
|
|
347
|
+
)
|
|
348
|
+
target_link_libraries(hotpath PRIVATE hotpath_interactive)
|
|
349
|
+
|
|
350
|
+
include(GNUInstallDirs)
|
|
351
|
+
|
|
352
|
+
install(
|
|
353
|
+
TARGETS hotpath
|
|
354
|
+
RUNTIME DESTINATION hotpath_py/bin
|
|
355
|
+
)
|