freesolo-chalk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. freesolo_chalk-0.1.0/.github/workflows/ci.yml +43 -0
  2. freesolo_chalk-0.1.0/.github/workflows/main-source-guard.yml +20 -0
  3. freesolo_chalk-0.1.0/.gitignore +28 -0
  4. freesolo_chalk-0.1.0/.pre-commit-config.yaml +10 -0
  5. freesolo_chalk-0.1.0/LICENSE +25 -0
  6. freesolo_chalk-0.1.0/Makefile +41 -0
  7. freesolo_chalk-0.1.0/NOTICE +26 -0
  8. freesolo_chalk-0.1.0/PKG-INFO +104 -0
  9. freesolo_chalk-0.1.0/README.md +56 -0
  10. freesolo_chalk-0.1.0/benchmark/README.md +20 -0
  11. freesolo_chalk-0.1.0/benchmark/__init__.py +0 -0
  12. freesolo_chalk-0.1.0/benchmark/scripts/.gitkeep +0 -0
  13. freesolo_chalk-0.1.0/dev/fmt-requirements.txt +1 -0
  14. freesolo_chalk-0.1.0/pyproject.toml +84 -0
  15. freesolo_chalk-0.1.0/setup.cfg +4 -0
  16. freesolo_chalk-0.1.0/setup.py +102 -0
  17. freesolo_chalk-0.1.0/src/chalk/__init__.py +0 -0
  18. freesolo_chalk-0.1.0/src/chalk/ops/__init__.py +12 -0
  19. freesolo_chalk-0.1.0/src/chalk/ops/embedding.py +353 -0
  20. freesolo_chalk-0.1.0/src/chalk/ops/fp8_base.py +349 -0
  21. freesolo_chalk-0.1.0/src/chalk/ops/lora.py +608 -0
  22. freesolo_chalk-0.1.0/src/chalk/ops/mlp.py +947 -0
  23. freesolo_chalk-0.1.0/src/chalk/ops/qkv.py +636 -0
  24. freesolo_chalk-0.1.0/src/chalk/ops/rope.py +455 -0
  25. freesolo_chalk-0.1.0/src/chalk/transformers/__init__.py +38 -0
  26. freesolo_chalk-0.1.0/src/chalk/transformers/apply.py +160 -0
  27. freesolo_chalk-0.1.0/src/chalk/transformers/embedding.py +5 -0
  28. freesolo_chalk-0.1.0/src/chalk/transformers/fp8_base.py +5 -0
  29. freesolo_chalk-0.1.0/src/chalk/transformers/lora.py +5 -0
  30. freesolo_chalk-0.1.0/src/chalk/transformers/mlp.py +6 -0
  31. freesolo_chalk-0.1.0/src/chalk/transformers/qkv.py +5 -0
  32. freesolo_chalk-0.1.0/src/chalk/transformers/rope.py +5 -0
  33. freesolo_chalk-0.1.0/src/chalk/utils.py +35 -0
  34. freesolo_chalk-0.1.0/src/freesolo_chalk.egg-info/PKG-INFO +104 -0
  35. freesolo_chalk-0.1.0/src/freesolo_chalk.egg-info/SOURCES.txt +57 -0
  36. freesolo_chalk-0.1.0/src/freesolo_chalk.egg-info/dependency_links.txt +1 -0
  37. freesolo_chalk-0.1.0/src/freesolo_chalk.egg-info/requires.txt +11 -0
  38. freesolo_chalk-0.1.0/src/freesolo_chalk.egg-info/top_level.txt +1 -0
  39. freesolo_chalk-0.1.0/test/__init__.py +0 -0
  40. freesolo_chalk-0.1.0/test/conftest.py +31 -0
  41. freesolo_chalk-0.1.0/test/ops/__init__.py +0 -0
  42. freesolo_chalk-0.1.0/test/ops/test_embedding.py +16 -0
  43. freesolo_chalk-0.1.0/test/ops/test_fp8_base.py +81 -0
  44. freesolo_chalk-0.1.0/test/ops/test_lora.py +82 -0
  45. freesolo_chalk-0.1.0/test/ops/test_mlp.py +337 -0
  46. freesolo_chalk-0.1.0/test/ops/test_ops_contract.py +23 -0
  47. freesolo_chalk-0.1.0/test/ops/test_qkv.py +16 -0
  48. freesolo_chalk-0.1.0/test/ops/test_rope.py +105 -0
  49. freesolo_chalk-0.1.0/test/test_helpers.py +67 -0
  50. freesolo_chalk-0.1.0/test/test_package.py +68 -0
  51. freesolo_chalk-0.1.0/test/test_smoke.py +38 -0
  52. freesolo_chalk-0.1.0/test/test_utils.py +86 -0
  53. freesolo_chalk-0.1.0/test/transformers/__init__.py +0 -0
  54. freesolo_chalk-0.1.0/test/transformers/test_apply_aggregator.py +220 -0
  55. freesolo_chalk-0.1.0/test/transformers/test_installer_noop.py +63 -0
  56. freesolo_chalk-0.1.0/test/transformers/test_kernel_install.py +97 -0
  57. freesolo_chalk-0.1.0/test/transformers/test_rope_install.py +75 -0
  58. freesolo_chalk-0.1.0/test/transformers/test_transformers_contract.py +33 -0
  59. freesolo_chalk-0.1.0/test/utils.py +170 -0
@@ -0,0 +1,43 @@
1
+ name: ci
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ push:
6
+ branches: [main, dev]
7
+ pull_request:
8
+ branches: [main, dev]
9
+
10
+ jobs:
11
+ checkstyle:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.11"
18
+ - name: Install ruff
19
+ run: pip install -r dev/fmt-requirements.txt
20
+ - name: Lint
21
+ run: ruff check --output-format=concise .
22
+ - name: Format check
23
+ run: ruff format --check --diff .
24
+
25
+ test:
26
+ runs-on: ubuntu-latest
27
+ strategy:
28
+ matrix:
29
+ python-version: ["3.10", "3.11", "3.12"]
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+ - uses: actions/setup-python@v5
33
+ with:
34
+ python-version: ${{ matrix.python-version }}
35
+ # CPU runner: install the package + pytest only (no torch/triton). GPU-only
36
+ # kernel correctness tests self-skip when torch/triton/CUDA are unavailable.
37
+ - name: Install
38
+ run: |
39
+ python -m pip install --upgrade pip
40
+ pip install -e .
41
+ pip install pytest pytest-cov
42
+ - name: Test (CPU)
43
+ run: python -m pytest --disable-warnings test/
@@ -0,0 +1,20 @@
1
+ name: Main source guard
2
+ on:
3
+ pull_request:
4
+ branches: [main]
5
+ permissions:
6
+ contents: read
7
+ jobs:
8
+ source-is-dev:
9
+ name: Source branch is dev
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Require PRs into main to originate from dev
13
+ env:
14
+ HEAD_REF: ${{ github.head_ref }}
15
+ run: |
16
+ if [ "$HEAD_REF" != "dev" ]; then
17
+ echo "::error::PRs into main must come from 'dev' (got '$HEAD_REF'). Merge into dev, then promote dev -> main."
18
+ exit 1
19
+ fi
20
+ echo "Source branch '$HEAD_REF' is allowed."
@@ -0,0 +1,28 @@
1
+ __pycache__/
2
+ *.egg-info/
3
+ site/
4
+ .cache/
5
+ .venv/
6
+ venv/
7
+ .ipynb_checkpoints/
8
+ .vscode/
9
+ .idea/
10
+
11
+ # Misc
12
+ .DS_Store
13
+
14
+ # Build
15
+ build/
16
+ dist/
17
+
18
+ # Lockfiles
19
+ uv.lock
20
+
21
+ # Benchmark images
22
+ benchmark/visualizations
23
+
24
+ # Coverage
25
+ .coverage
26
+ htmlcov/
27
+ .pytest_cache/
28
+ .ruff_cache/
@@ -0,0 +1,10 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ # Ruff version.
4
+ rev: v0.14.11
5
+ hooks:
6
+ # Run the linter.
7
+ - id: ruff-check
8
+ args: [ --fix ]
9
+ # Run the formatter.
10
+ - id: ruff-format
@@ -0,0 +1,25 @@
1
+ BSD 2-CLAUSE LICENSE
2
+
3
+ Copyright (c) 2026 Freesolo, Inc.
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,41 @@
1
+ .PHONY: test checkstyle coverage run-benchmarks all
2
+
3
+ all: checkstyle test
4
+
5
+ # Command to run pytest for correctness tests
6
+ test:
7
+ python -m pytest --disable-warnings \
8
+ --cov=src/chalk \
9
+ --cov-report=term-missing \
10
+ test/
11
+
12
+ # Command to run coverage report
13
+ coverage:
14
+ coverage report -m
15
+
16
+ # Command to run ruff for linting and formatting code
17
+ checkstyle:
18
+ ruff check --output-format=concise .; ruff_check_status=$$?; \
19
+ ruff format --check --diff .; ruff_format_status=$$?; \
20
+ ruff check . --fix; \
21
+ ruff format .; \
22
+ if [ $$ruff_check_status -ne 0 ] || [ $$ruff_format_status -ne 0 ]; then \
23
+ exit 1; \
24
+ fi
25
+
26
+ # Command to run all benchmark scripts and update benchmarking data file
27
+ # By default this doesn't overwrite existing data for the same benchmark experiment
28
+ # run with `make run-benchmarks OVERWRITE=1` to overwrite existing benchmark data
29
+ BENCHMARK_DIR = benchmark/scripts
30
+ BENCHMARK_SCRIPTS = $(wildcard $(BENCHMARK_DIR)/benchmark_*.py)
31
+ OVERWRITE ?= 0
32
+
33
+ run-benchmarks:
34
+ @for script in $(BENCHMARK_SCRIPTS); do \
35
+ echo "Running benchmark: $$script"; \
36
+ if [ $(OVERWRITE) -eq 1 ]; then \
37
+ python $$script --overwrite; \
38
+ else \
39
+ python $$script; \
40
+ fi; \
41
+ done
@@ -0,0 +1,26 @@
1
+ Copyright 2026 Freesolo, Inc.
2
+ All Rights Reserved.
3
+
4
+ Licensed under the BSD 2-Clause License (the "License"). See LICENSE in the
5
+ project root for license information.
6
+
7
+ Chalk provides custom Triton/CUDA kernels designed to complement Liger Kernel
8
+ (https://github.com/linkedin/Liger-Kernel, BSD 2-Clause). Its repository layout,
9
+ tooling, and conventions intentionally mirror Liger Kernel.
10
+
11
+ This product may contain code derived from the following open source projects:
12
+
13
+ 1. Liger Kernel
14
+ Copyright (c) 2024 LinkedIn Corporation
15
+ Licensed under the BSD 2-Clause License
16
+ Source: https://github.com/linkedin/Liger-Kernel
17
+
18
+ Repository structure (src/ layout, ops/transformers split), benchmarking
19
+ harness conventions, and test scaffolding were referenced from this project.
20
+
21
+ 2. Triton
22
+ Copyright (c) 2023 OpenAI
23
+ Licensed under the MIT License
24
+ Source: https://github.com/openai/triton
25
+
26
+ For full license texts, please refer to the respective project repositories.
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: freesolo-chalk
3
+ Version: 0.1.0
4
+ Summary: Custom Triton/CUDA kernels that complement Liger Kernel for LLM post-training
5
+ License: BSD 2-CLAUSE LICENSE
6
+
7
+ Copyright (c) 2026 Freesolo, Inc.
8
+ All rights reserved.
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions are met:
12
+
13
+ 1. Redistributions of source code must retain the above copyright notice, this
14
+ list of conditions and the following disclaimer.
15
+
16
+ 2. Redistributions in binary form must reproduce the above copyright notice,
17
+ this list of conditions and the following disclaimer in the documentation
18
+ and/or other materials provided with the distribution.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ Project-URL: Homepage, https://github.com/freesolo-co/chalk
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ License-File: NOTICE
35
+ Requires-Dist: torch>=2.1.2
36
+ Requires-Dist: triton>=2.3.1
37
+ Provides-Extra: dev
38
+ Requires-Dist: transformers>=4.52.0; extra == "dev"
39
+ Requires-Dist: matplotlib>=3.7.2; extra == "dev"
40
+ Requires-Dist: ruff>=0.12.0; extra == "dev"
41
+ Requires-Dist: pytest>=7.1.2; extra == "dev"
42
+ Requires-Dist: pytest-xdist; extra == "dev"
43
+ Requires-Dist: pytest-cov; extra == "dev"
44
+ Requires-Dist: datasets>=2.19.2; extra == "dev"
45
+ Dynamic: license-file
46
+ Dynamic: provides-extra
47
+ Dynamic: requires-dist
48
+
49
+ # Chalk
50
+
51
+ **Custom Triton/CUDA kernels that complement [Liger Kernel](https://github.com/linkedin/Liger-Kernel).**
52
+
53
+ `pip install freesolo-chalk`
54
+
55
+ Liger fuses the cross-entropy, activation, and RMSNorm paths. Chalk fills the gaps that
56
+ matter for Freesolo's [Flash](https://github.com/freesolo-co/flash) post-training stack —
57
+ fused GEMMs, the LoRA-delta matmuls, the QKV norm+RoPE epilogue, embedding gather, and
58
+ FP8 frozen-base GEMMs — each behind a documented, benchmarked, opt-in entry point.
59
+
60
+ Chalk's repository layout and conventions intentionally mirror Liger Kernel.
61
+
62
+ ## Layout
63
+
64
+ ```
65
+ src/chalk/
66
+ ops/ # raw Triton/CUDA kernels + autograd.Function wrappers
67
+ transformers/ # model-level installers that monkeypatch kernels into HF modules
68
+ utils.py # device detection helpers
69
+ test/ # correctness + gating tests (mirrors test/ops, test/transformers)
70
+ benchmark/ # speed + fp32-correctness A/B harness
71
+ ```
72
+
73
+ ## Design principles
74
+
75
+ - **Worker-side kernel library.** Like Liger, chalk depends on `torch` + `triton` — it is
76
+ meant to be installed where kernels actually run (the GPU worker), so consumers should
77
+ depend on it from a `gpu` extra rather than their base install. Importing the top-level
78
+ `chalk` package is still cheap (kernels lazy-load), so probing `chalk.utils.infer_device()`
79
+ never forces a heavy import.
80
+ - **Complements, not replaces, Liger.** Liger fuses CE / activation / RMSNorm; chalk fuses
81
+ the GEMMs, LoRA delta, QKV epilogue, embedding, and FP8 base.
82
+ - **Safe fallback.** Every installer is arch-gated, runs a numeric self-test on install,
83
+ patches only frozen `nn.Linear` layers (never trainable / PEFT-wrapped layers), and
84
+ silently falls back to the eager / Liger path on any import / compile / self-test failure.
85
+ - **Opt-in & evidence-based.** Kernels are off unless explicitly enabled, and every kept
86
+ kernel has end-to-end loss-curve evidence — not just a microbenchmark.
87
+
88
+ ## Development
89
+
90
+ ```bash
91
+ pip install -e '.[dev]'
92
+ make checkstyle # ruff check + format
93
+ make test # pytest with coverage
94
+ make run-benchmarks
95
+ ```
96
+
97
+ ## Status
98
+
99
+ Intentionally minimal to start — kernels are landed one at a time under `chalk/ops` +
100
+ `chalk/transformers`, each with correctness tests and benchmark evidence.
101
+
102
+ ## License
103
+
104
+ BSD-2-Clause. See [LICENSE](LICENSE) and [NOTICE](NOTICE).
@@ -0,0 +1,56 @@
1
+ # Chalk
2
+
3
+ **Custom Triton/CUDA kernels that complement [Liger Kernel](https://github.com/linkedin/Liger-Kernel).**
4
+
5
+ `pip install freesolo-chalk`
6
+
7
+ Liger fuses the cross-entropy, activation, and RMSNorm paths. Chalk fills the gaps that
8
+ matter for Freesolo's [Flash](https://github.com/freesolo-co/flash) post-training stack —
9
+ fused GEMMs, the LoRA-delta matmuls, the QKV norm+RoPE epilogue, embedding gather, and
10
+ FP8 frozen-base GEMMs — each behind a documented, benchmarked, opt-in entry point.
11
+
12
+ Chalk's repository layout and conventions intentionally mirror Liger Kernel.
13
+
14
+ ## Layout
15
+
16
+ ```
17
+ src/chalk/
18
+ ops/ # raw Triton/CUDA kernels + autograd.Function wrappers
19
+ transformers/ # model-level installers that monkeypatch kernels into HF modules
20
+ utils.py # device detection helpers
21
+ test/ # correctness + gating tests (mirrors test/ops, test/transformers)
22
+ benchmark/ # speed + fp32-correctness A/B harness
23
+ ```
24
+
25
+ ## Design principles
26
+
27
+ - **Worker-side kernel library.** Like Liger, chalk depends on `torch` + `triton` — it is
28
+ meant to be installed where kernels actually run (the GPU worker), so consumers should
29
+ depend on it from a `gpu` extra rather than their base install. Importing the top-level
30
+ `chalk` package is still cheap (kernels lazy-load), so probing `chalk.utils.infer_device()`
31
+ never forces a heavy import.
32
+ - **Complements, not replaces, Liger.** Liger fuses CE / activation / RMSNorm; chalk fuses
33
+ the GEMMs, LoRA delta, QKV epilogue, embedding, and FP8 base.
34
+ - **Safe fallback.** Every installer is arch-gated, runs a numeric self-test on install,
35
+ patches only frozen `nn.Linear` layers (never trainable / PEFT-wrapped layers), and
36
+ silently falls back to the eager / Liger path on any import / compile / self-test failure.
37
+ - **Opt-in & evidence-based.** Kernels are off unless explicitly enabled, and every kept
38
+ kernel has end-to-end loss-curve evidence — not just a microbenchmark.
39
+
40
+ ## Development
41
+
42
+ ```bash
43
+ pip install -e '.[dev]'
44
+ make checkstyle # ruff check + format
45
+ make test # pytest with coverage
46
+ make run-benchmarks
47
+ ```
48
+
49
+ ## Status
50
+
51
+ Intentionally minimal to start — kernels are landed one at a time under `chalk/ops` +
52
+ `chalk/transformers`, each with correctness tests and benchmark evidence.
53
+
54
+ ## License
55
+
56
+ BSD-2-Clause. See [LICENSE](LICENSE) and [NOTICE](NOTICE).
@@ -0,0 +1,20 @@
1
+ ## Benchmarking Chalk Kernels
2
+
3
+ Chalk's benchmarking harness mirrors [Liger Kernel's](https://github.com/linkedin/Liger-Kernel/tree/main/benchmark):
4
+ each kernel ships a `benchmark/scripts/benchmark_<kernel>.py` that A/Bs the chalk kernel
5
+ against the eager / Liger / `torch` baseline across model configs and sequence lengths,
6
+ reporting both speed and a fp32-correctness check.
7
+
8
+ ### Running
9
+
10
+ ```bash
11
+ make run-benchmarks # run every benchmark/scripts/benchmark_*.py
12
+ make run-benchmarks OVERWRITE=1 # overwrite existing recorded data
13
+ ```
14
+
15
+ ### Conventions
16
+
17
+ * Every kept kernel must have **end-to-end** evidence (a real LoRA-SFT / GRPO loss-curve
18
+ A/B), not just a microbenchmark — a microbenchmark win that disappears E2E is not a win.
19
+ * Record the GPU (A40 / A6000 / H100 / H200) the numbers were measured on; cuBLAS strength
20
+ differs enough across SKUs that a win on one can vanish on another.
File without changes
File without changes
@@ -0,0 +1 @@
1
+ ruff>=0.1.6
@@ -0,0 +1,84 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel", "setuptools-scm"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "freesolo-chalk"
7
+ version = "0.1.0"
8
+ description = "Custom Triton/CUDA kernels that complement Liger Kernel for LLM post-training"
9
+ urls = { "Homepage" = "https://github.com/freesolo-co/chalk" }
10
+ readme = { file = "README.md", content-type = "text/markdown" }
11
+ license = { file = "LICENSE" }
12
+ dynamic = ["dependencies", "optional-dependencies"]
13
+
14
+ [tool.setuptools]
15
+ package-dir = {"" = "src"}
16
+
17
+ [tool.setuptools.packages.find]
18
+ where = ["src"]
19
+ include = ["chalk*"]
20
+ namespaces = false
21
+
22
+ [tool.pytest.ini_options]
23
+ pythonpath = ["src", "."]
24
+ addopts = [
25
+ "--cov=src/chalk",
26
+ "--cov-report=term-missing",
27
+ "--cov-report=html",
28
+ "--cov-config=pyproject.toml",
29
+ "--durations=0"
30
+ ]
31
+ python_files = "test_*.py"
32
+ testpaths = ["test/"]
33
+ markers = [
34
+ "gpu: requires a CUDA GPU + torch/triton (skipped on CPU CI)",
35
+ ]
36
+
37
+ [tool.coverage.run]
38
+ branch = true
39
+ parallel = true
40
+ source = ["src/chalk"]
41
+ concurrency = ["multiprocessing"]
42
+
43
+ [tool.coverage.paths]
44
+ chalk = [
45
+ "src/chalk",
46
+ "*/site-packages/chalk"
47
+ ]
48
+
49
+ [tool.coverage.report]
50
+ omit = ["test/*"]
51
+ show_missing = true
52
+ skip_covered = false
53
+
54
+
55
+ [tool.ruff]
56
+ line-length = 120
57
+ target-version = "py310"
58
+ respect-gitignore = true
59
+ src = ["src"]
60
+
61
+ [tool.ruff.lint]
62
+ select = [
63
+ "E", # pycodestyle errors
64
+ "F", # pyflakes
65
+ "I", # isort
66
+ ]
67
+ ignore = ["E501", "B006", "E731", "A002", "E203"]
68
+
69
+ exclude = [
70
+ ".git",
71
+ "__pycache__",
72
+ ".venv",
73
+ ]
74
+
75
+ [tool.ruff.format]
76
+ quote-style = "double"
77
+ indent-style = "space"
78
+ skip-magic-trailing-comma = false
79
+ line-ending = "auto"
80
+
81
+ [tool.ruff.lint.isort]
82
+ known-first-party = ["chalk"]
83
+ force-single-line = true
84
+ lines-between-types = 1
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,102 @@
1
+ # setup.py
2
+
3
+ import subprocess
4
+
5
+ from typing import Literal
6
+
7
+ from setuptools import find_packages
8
+ from setuptools import setup
9
+
10
+
11
+ def get_default_dependencies():
12
+ """Determine the appropriate dependencies based on detected hardware."""
13
+ platform = get_platform()
14
+
15
+ if platform in ["cuda", "cpu"]:
16
+ return [
17
+ "torch>=2.1.2",
18
+ "triton>=2.3.1",
19
+ ]
20
+ elif platform == "rocm":
21
+ return [
22
+ "triton>=3.0.0",
23
+ ]
24
+ elif platform == "xpu":
25
+ return [
26
+ "torch>=2.6.0",
27
+ ]
28
+
29
+
30
+ def get_optional_dependencies():
31
+ """Get optional dependency groups."""
32
+ dev_deps = [
33
+ "transformers>=4.52.0",
34
+ "matplotlib>=3.7.2",
35
+ "ruff>=0.12.0",
36
+ "pytest>=7.1.2",
37
+ "pytest-xdist",
38
+ "pytest-cov",
39
+ "datasets>=2.19.2",
40
+ ]
41
+ return {
42
+ "dev": dev_deps,
43
+ }
44
+
45
+
46
+ def is_xpu_available():
47
+ """Check if Intel XPU is available."""
48
+ try:
49
+ subprocess.run(["xpu-smi"], check=True)
50
+ return True
51
+ except (subprocess.SubprocessError, FileNotFoundError):
52
+ pass
53
+
54
+ try:
55
+ result = subprocess.run("sycl-ls", check=True, capture_output=True, shell=True)
56
+ if "level_zero:gpu" in result.stdout.decode():
57
+ return True
58
+ except (subprocess.SubprocessError, FileNotFoundError):
59
+ pass
60
+
61
+ return False
62
+
63
+
64
+ def get_platform() -> Literal["cuda", "rocm", "cpu", "xpu"]:
65
+ """Detect whether the system has NVIDIA or AMD GPU without a torch dependency."""
66
+ # Try nvidia-smi first
67
+ try:
68
+ subprocess.run(["nvidia-smi"], check=True)
69
+ print("NVIDIA GPU detected")
70
+ return "cuda"
71
+ except (subprocess.SubprocessError, FileNotFoundError):
72
+ # If nvidia-smi fails, check for ROCm
73
+ try:
74
+ subprocess.run(["rocm-smi"], check=True)
75
+ print("ROCm GPU detected")
76
+ return "rocm"
77
+ except (subprocess.SubprocessError, FileNotFoundError):
78
+ if is_xpu_available():
79
+ print("Intel GPU detected")
80
+ return "xpu"
81
+ else:
82
+ print("No GPU detected")
83
+ return "cpu"
84
+
85
+
86
+ setup(
87
+ name="freesolo-chalk",
88
+ package_dir={"": "src"},
89
+ packages=find_packages(where="src"),
90
+ install_requires=get_default_dependencies(),
91
+ extras_require=get_optional_dependencies(),
92
+ classifiers=[
93
+ "Development Status :: 3 - Alpha",
94
+ "Intended Audience :: Developers",
95
+ "Intended Audience :: Science/Research",
96
+ "Programming Language :: Python :: 3",
97
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
98
+ "Topic :: Software Development :: Libraries :: Python Modules",
99
+ "License :: OSI Approved :: BSD License",
100
+ "Operating System :: OS Independent",
101
+ ],
102
+ )
File without changes
@@ -0,0 +1,12 @@
1
+ """
2
+ Chalk operators — raw Triton/CUDA kernels and their ``torch.autograd.Function`` wrappers.
3
+
4
+ Mirrors Liger Kernel's layout: ``chalk.ops`` holds the low-level kernel implementations
5
+ (``@triton.jit`` functions, autograd Functions, FP8 GEMM helpers), while ``chalk.transformers``
6
+ holds the model-level installers that monkeypatch these kernels into HuggingFace modules.
7
+
8
+ This namespace starts empty by design — kernels are landed one at a time, each with its own
9
+ benchmark evidence.
10
+ """
11
+
12
+ __all__: list[str] = []