flashspec 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. flashspec-0.1.0/.github/workflows/benchmark.yml +76 -0
  2. flashspec-0.1.0/.github/workflows/ci.yml +139 -0
  3. flashspec-0.1.0/.github/workflows/gpu_tests.yml +54 -0
  4. flashspec-0.1.0/.gitignore +80 -0
  5. flashspec-0.1.0/.readthedocs.yaml +26 -0
  6. flashspec-0.1.0/AGENTS.md +928 -0
  7. flashspec-0.1.0/CHANGELOG.md +125 -0
  8. flashspec-0.1.0/CONTRIBUTING.md +170 -0
  9. flashspec-0.1.0/LICENSE +117 -0
  10. flashspec-0.1.0/Makefile +48 -0
  11. flashspec-0.1.0/PKG-INFO +331 -0
  12. flashspec-0.1.0/README.md +172 -0
  13. flashspec-0.1.0/benchmarks/README.md +127 -0
  14. flashspec-0.1.0/benchmarks/__init__.py +0 -0
  15. flashspec-0.1.0/benchmarks/baselines.py +104 -0
  16. flashspec-0.1.0/benchmarks/benchmark_kernels.py +327 -0
  17. flashspec-0.1.0/benchmarks/compare_baselines.py +168 -0
  18. flashspec-0.1.0/benchmarks/configs/llama3_70b.yaml +26 -0
  19. flashspec-0.1.0/benchmarks/configs/llama3_8b.yaml +30 -0
  20. flashspec-0.1.0/benchmarks/configs/mistral_7b.yaml +25 -0
  21. flashspec-0.1.0/benchmarks/results/.gitkeep +0 -0
  22. flashspec-0.1.0/benchmarks/results/baseline.json +26 -0
  23. flashspec-0.1.0/benchmarks/run_all.py +335 -0
  24. flashspec-0.1.0/benchmarks/sweep_draft_sizes.py +159 -0
  25. flashspec-0.1.0/benchmarks/sweep_gamma.py +93 -0
  26. flashspec-0.1.0/deploy/Dockerfile +44 -0
  27. flashspec-0.1.0/deploy/docker-compose.yml +23 -0
  28. flashspec-0.1.0/deploy/k8s/flashspec-deployment.yaml +47 -0
  29. flashspec-0.1.0/docs/api/index.md +14 -0
  30. flashspec-0.1.0/docs/architecture.md +142 -0
  31. flashspec-0.1.0/docs/bandit.md +146 -0
  32. flashspec-0.1.0/docs/benchmarks.md +70 -0
  33. flashspec-0.1.0/docs/index.md +20 -0
  34. flashspec-0.1.0/docs/kernels.md +84 -0
  35. flashspec-0.1.0/docs/mkdocs.yml +5 -0
  36. flashspec-0.1.0/docs/requirements.txt +5 -0
  37. flashspec-0.1.0/flashspec/__init__.py +43 -0
  38. flashspec-0.1.0/flashspec/bandit/__init__.py +14 -0
  39. flashspec-0.1.0/flashspec/bandit/base.py +402 -0
  40. flashspec-0.1.0/flashspec/bandit/oracle.py +181 -0
  41. flashspec-0.1.0/flashspec/bandit/thompson.py +178 -0
  42. flashspec-0.1.0/flashspec/bandit/ucb.py +175 -0
  43. flashspec-0.1.0/flashspec/engine/__init__.py +15 -0
  44. flashspec-0.1.0/flashspec/engine/drafter.py +247 -0
  45. flashspec-0.1.0/flashspec/engine/speculative.py +257 -0
  46. flashspec-0.1.0/flashspec/engine/verifier.py +205 -0
  47. flashspec-0.1.0/flashspec/export/__init__.py +5 -0
  48. flashspec-0.1.0/flashspec/export/onnx.py +113 -0
  49. flashspec-0.1.0/flashspec/kernels/__init__.py +18 -0
  50. flashspec-0.1.0/flashspec/kernels/_reference.py +196 -0
  51. flashspec-0.1.0/flashspec/kernels/gather_kernel.py +136 -0
  52. flashspec-0.1.0/flashspec/kernels/verify_kernel.py +228 -0
  53. flashspec-0.1.0/flashspec/metrics/__init__.py +11 -0
  54. flashspec-0.1.0/flashspec/metrics/acceptance.py +175 -0
  55. flashspec-0.1.0/flashspec/metrics/latency.py +234 -0
  56. flashspec-0.1.0/flashspec/metrics/throughput.py +249 -0
  57. flashspec-0.1.0/flashspec/py.typed +0 -0
  58. flashspec-0.1.0/flashspec/sampling/__init__.py +9 -0
  59. flashspec-0.1.0/flashspec/sampling/rejection.py +235 -0
  60. flashspec-0.1.0/flashspec/sampling/typical.py +138 -0
  61. flashspec-0.1.0/flashspec/utils/__init__.py +20 -0
  62. flashspec-0.1.0/flashspec/utils/config.py +159 -0
  63. flashspec-0.1.0/flashspec/utils/device.py +165 -0
  64. flashspec-0.1.0/flashspec/utils/logging.py +117 -0
  65. flashspec-0.1.0/mkdocs.yml +74 -0
  66. flashspec-0.1.0/notebooks/01_quickstart.py +81 -0
  67. flashspec-0.1.0/paper/Makefile +12 -0
  68. flashspec-0.1.0/paper/figures/.gitkeep +7 -0
  69. flashspec-0.1.0/paper/flashspec.bib +77 -0
  70. flashspec-0.1.0/paper/flashspec.tex +328 -0
  71. flashspec-0.1.0/pyproject.toml +136 -0
  72. flashspec-0.1.0/scripts/__init__.py +0 -0
  73. flashspec-0.1.0/scripts/check_regression.py +197 -0
  74. flashspec-0.1.0/scripts/download_models.py +128 -0
  75. flashspec-0.1.0/scripts/export_draft.py +186 -0
  76. flashspec-0.1.0/scripts/profile_kernel.py +208 -0
  77. flashspec-0.1.0/scripts/update_benchmark_results.py +181 -0
  78. flashspec-0.1.0/tests/__init__.py +0 -0
  79. flashspec-0.1.0/tests/chaos/__init__.py +0 -0
  80. flashspec-0.1.0/tests/chaos/test_bandit_adversarial.py +85 -0
  81. flashspec-0.1.0/tests/conftest.py +116 -0
  82. flashspec-0.1.0/tests/integration/__init__.py +0 -0
  83. flashspec-0.1.0/tests/integration/test_e2e_greedy.py +136 -0
  84. flashspec-0.1.0/tests/integration/test_e2e_sampling.py +250 -0
  85. flashspec-0.1.0/tests/integration/test_onnx_parity.py +149 -0
  86. flashspec-0.1.0/tests/unit/__init__.py +0 -0
  87. flashspec-0.1.0/tests/unit/test_acceptance.py +90 -0
  88. flashspec-0.1.0/tests/unit/test_bandit.py +327 -0
  89. flashspec-0.1.0/tests/unit/test_config.py +151 -0
  90. flashspec-0.1.0/tests/unit/test_sampling.py +271 -0
  91. flashspec-0.1.0/tests/unit/test_verify_kernel.py +370 -0
@@ -0,0 +1,76 @@
1
+ name: Performance Benchmark
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ schedule:
7
+ - cron: "0 4 * * *" # Nightly at 04:00 UTC
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ # ── Job: bench-full ──────────────────────────────────────────────────────────
12
+ bench-full:
13
+ name: bench-full
14
+ runs-on: [self-hosted, gpu, H100]
15
+ timeout-minutes: 240
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ with:
19
+ # Need push access for committing results back to main.
20
+ token: ${{ secrets.BENCHMARK_COMMIT_TOKEN }}
21
+
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: "3.11"
25
+ cache: pip
26
+
27
+ - name: Install bench dependencies
28
+ run: pip install -e ".[bench]"
29
+
30
+ - name: Build Docker image
31
+ run: docker build -t flashspec:latest -f deploy/Dockerfile .
32
+
33
+ - name: Trivy image scan — CRITICAL CVEs block build (§15)
34
+ uses: aquasecurity/trivy-action@master
35
+ with:
36
+ image-ref: "flashspec:latest"
37
+ format: "table"
38
+ exit-code: "1"
39
+ severity: "CRITICAL"
40
+ ignore-unfixed: true
41
+
42
+ - name: Run full benchmark suite (make bench)
43
+ run: make bench
44
+ env:
45
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
46
+
47
+ - name: Validate and commit results
48
+ run: python scripts/update_benchmark_results.py
49
+ env:
50
+ GIT_AUTHOR_NAME: "FlashSpec Benchmark Bot"
51
+ GIT_AUTHOR_EMAIL: "ci@flashspec"
52
+ GIT_COMMITTER_NAME: "FlashSpec Benchmark Bot"
53
+ GIT_COMMITTER_EMAIL: "ci@flashspec"
54
+
55
+ - name: Check performance regression
56
+ run: python scripts/check_regression.py --threshold 0.05
57
+
58
+ - name: Upload result artifacts
59
+ if: always()
60
+ uses: actions/upload-artifact@v4
61
+ with:
62
+ name: benchmark-results-${{ github.sha }}
63
+ path: benchmarks/results/
64
+ retention-days: 90
65
+
66
+ - name: Notify on failure
67
+ if: failure()
68
+ uses: slackapi/slack-github-action@v1.26.0
69
+ with:
70
+ payload: |
71
+ {
72
+ "text": ":red_circle: FlashSpec nightly benchmark FAILED on `${{ github.sha }}`.\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View run>"
73
+ }
74
+ env:
75
+ SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
76
+ SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
@@ -0,0 +1,139 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, dev]
6
+ pull_request:
7
+ branches: [main, dev]
8
+
9
+ concurrency:
10
+ group: ci-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ # ── Job 1: lint ──────────────────────────────────────────────────────────────
15
+ lint:
16
+ name: lint
17
+ runs-on: ubuntu-22.04
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.11"
23
+ cache: pip
24
+ - name: Install dev dependencies
25
+ run: pip install -e ".[dev]" --extra-index-url https://download.pytorch.org/whl/cpu
26
+ - name: make lint
27
+ run: make lint
28
+ - name: pip-audit (fail on CRITICAL CVE — §15)
29
+ run: |
30
+ pip install pip-audit
31
+ pip-audit --strict --vulnerability-service pypi -r <(pip freeze) 2>&1 | tee pip-audit.txt
32
+ # Exit non-zero only on CRITICAL severity
33
+ grep -q "CRITICAL" pip-audit.txt && exit 1 || true
34
+
35
+ # ── Job 2: test-cpu ──────────────────────────────────────────────────────────
36
+ test-cpu:
37
+ name: test-cpu
38
+ runs-on: ubuntu-22.04
39
+ needs: lint
40
+ env:
41
+ TRITON_CPU_BACKEND: "1"
42
+ steps:
43
+ - uses: actions/checkout@v4
44
+ - uses: actions/setup-python@v5
45
+ with:
46
+ python-version: "3.11"
47
+ cache: pip
48
+ - name: Install
49
+ run: pip install -e ".[dev]" --extra-index-url https://download.pytorch.org/whl/cpu
50
+ - name: make test
51
+ run: make test
52
+ - name: Upload coverage XML
53
+ uses: actions/upload-artifact@v4
54
+ with:
55
+ name: coverage-cpu
56
+ path: coverage.xml
57
+ retention-days: 7
58
+
59
+ # ── Job 3: test-chaos ────────────────────────────────────────────────────────
60
+ test-chaos:
61
+ name: test-chaos
62
+ runs-on: ubuntu-22.04
63
+ needs: lint
64
+ env:
65
+ TRITON_CPU_BACKEND: "1"
66
+ steps:
67
+ - uses: actions/checkout@v4
68
+ - uses: actions/setup-python@v5
69
+ with:
70
+ python-version: "3.11"
71
+ cache: pip
72
+ - name: Install
73
+ run: pip install -e ".[dev]" --extra-index-url https://download.pytorch.org/whl/cpu
74
+ - name: make test-chaos
75
+ run: make test-chaos
76
+
77
+ # ── Job 4: coverage ──────────────────────────────────────────────────────────
78
+ coverage:
79
+ name: coverage
80
+ runs-on: ubuntu-22.04
81
+ needs: test-cpu
82
+ steps:
83
+ - uses: actions/checkout@v4
84
+ - uses: actions/setup-python@v5
85
+ with:
86
+ python-version: "3.11"
87
+ cache: pip
88
+ - name: Install
89
+ run: pip install -e ".[dev]" --extra-index-url https://download.pytorch.org/whl/cpu
90
+ - name: Download coverage artifact
91
+ uses: actions/download-artifact@v4
92
+ with:
93
+ name: coverage-cpu
94
+ - name: Fail if coverage < 95%
95
+ run: |
96
+ pip install coverage --break-system-packages 2>/dev/null || pip install coverage
97
+ python -m coverage report --fail-under=95 --rcfile=pyproject.toml
98
+ - name: Upload to Codecov
99
+ uses: codecov/codecov-action@v4
100
+ with:
101
+ files: coverage.xml
102
+ flags: cpu
103
+ fail_ci_if_error: false
104
+
105
+ # ── Job 5: onnx-parity ───────────────────────────────────────────────────────
106
+ onnx-parity:
107
+ name: onnx-parity
108
+ runs-on: ubuntu-22.04
109
+ needs: test-cpu
110
+ env:
111
+ TRITON_CPU_BACKEND: "1"
112
+ steps:
113
+ - uses: actions/checkout@v4
114
+ - uses: actions/setup-python@v5
115
+ with:
116
+ python-version: "3.11"
117
+ cache: pip
118
+ - name: Install with ONNX extras
119
+ run: |
120
+ pip install -e ".[dev,onnx]" \
121
+ --extra-index-url https://download.pytorch.org/whl/cpu
122
+ - name: Run ONNX parity tests
123
+ run: pytest tests/integration/test_onnx_parity.py -v
124
+
125
+ # ── Job 6: docs-build ────────────────────────────────────────────────────────
126
+ docs-build:
127
+ name: docs-build
128
+ runs-on: ubuntu-22.04
129
+ needs: lint
130
+ steps:
131
+ - uses: actions/checkout@v4
132
+ - uses: actions/setup-python@v5
133
+ with:
134
+ python-version: "3.11"
135
+ cache: pip
136
+ - name: Install docs dependencies
137
+ run: pip install -e ".[docs]" --extra-index-url https://download.pytorch.org/whl/cpu
138
+ - name: make docs
139
+ run: make docs
@@ -0,0 +1,54 @@
1
+ name: GPU Tests
2
+
3
+ on:
4
+ pull_request:
5
+ branches: [main]
6
+ schedule:
7
+ - cron: "0 2 * * *" # Nightly at 02:00 UTC
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ # ── Job 1: test-gpu ──────────────────────────────────────────────────────────
12
+ test-gpu:
13
+ name: test-gpu
14
+ runs-on: [self-hosted, gpu, A10G]
15
+ timeout-minutes: 90
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.11"
21
+ cache: pip
22
+ - name: Install
23
+ run: pip install -e ".[dev]"
24
+ - name: make test-gpu
25
+ run: make test-gpu
26
+ - name: Run slow distribution tests (N=10,000 KS gate)
27
+ run: pytest tests/integration/test_e2e_sampling.py -m slow -x -v
28
+ - name: Upload coverage
29
+ uses: codecov/codecov-action@v4
30
+ with:
31
+ files: coverage.xml
32
+ flags: gpu
33
+ fail_ci_if_error: false
34
+
35
+ # ── Job 2: bench-quick ───────────────────────────────────────────────────────
36
+ bench-quick:
37
+ name: bench-quick
38
+ runs-on: [self-hosted, gpu, A10G]
39
+ needs: test-gpu
40
+ timeout-minutes: 30
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+ - uses: actions/setup-python@v5
44
+ with:
45
+ python-version: "3.11"
46
+ cache: pip
47
+ - name: Install bench dependencies
48
+ run: pip install -e ".[bench]"
49
+ - name: make bench-quick
50
+ run: make bench-quick
51
+ - name: Assert throughput ≥ 1.5× AR
52
+ # The toy benchmark prints pass/fail; exit code 1 on failure.
53
+ run: |
54
+ python benchmarks/run_all.py --config benchmarks/configs/ --toy
@@ -0,0 +1,80 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ *.egg
11
+ *.whl
12
+
13
+ # Virtual environments
14
+ .venv/
15
+ venv/
16
+ env/
17
+ .env
18
+
19
+ # Testing & coverage
20
+ .pytest_cache/
21
+ .coverage
22
+ htmlcov/
23
+ coverage.xml
24
+ *.cover
25
+
26
+ # Mypy
27
+ .mypy_cache/
28
+
29
+ # Ruff
30
+ .ruff_cache/
31
+
32
+ # MkDocs build output (never commit)
33
+ site/
34
+
35
+ # Triton cache
36
+ .triton/
37
+ triton_cache/
38
+
39
+ # Model weights (NEVER commit)
40
+ *.bin
41
+ *.safetensors
42
+ *.pt
43
+ *.pth
44
+ *.gguf
45
+ *.ggml
46
+ hf_cache/
47
+ models/
48
+
49
+ # Benchmark results (committed selectively)
50
+ benchmarks/results/*.json
51
+ !benchmarks/results/baseline.json
52
+ !benchmarks/results/.gitkeep
53
+
54
+ # Jupyter
55
+ *.ipynb
56
+ .ipynb_checkpoints/
57
+
58
+ # Paper build artefacts
59
+ paper/*.aux
60
+ paper/*.bbl
61
+ paper/*.blg
62
+ paper/*.log
63
+ paper/*.out
64
+ paper/*.pdf
65
+ paper/figures/*.pdf
66
+ paper/figures/*.png
67
+
68
+ # IDE
69
+ .idea/
70
+ .vscode/
71
+ *.swp
72
+ *.swo
73
+ .DS_Store
74
+ Thumbs.db
75
+
76
+ # Secrets
77
+ .env
78
+ *.key
79
+ *.pem
80
+ secrets/
@@ -0,0 +1,26 @@
1
+ # Read the Docs configuration file
2
+ # Required for all projects since September 2023.
3
+ # See https://docs.readthedocs.com/platform/stable/config-file/index.html
4
+
5
+ version: 2
6
+
7
+ build:
8
+ os: ubuntu-22.04
9
+ tools:
10
+ python: "3.11"
11
+ jobs:
12
+ pre_install:
13
+ # Install MkDocs Material and mkdocstrings before the main install step.
14
+ - pip install mkdocs-material mkdocstrings[python] pymdown-extensions
15
+
16
+ # MkDocs configuration: point to the file at the repo root.
17
+ mkdocs:
18
+ configuration: mkdocs.yml
19
+
20
+ # Install the flashspec package so mkdocstrings can import it for API docs.
21
+ python:
22
+ install:
23
+ - method: pip
24
+ path: .
25
+ extra_requirements:
26
+ - docs