lmxlab 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. lmxlab-0.2.0/.github/dependabot.yml +11 -0
  2. lmxlab-0.2.0/.github/pull_request_template.md +13 -0
  3. lmxlab-0.2.0/.github/workflows/ci.yml +42 -0
  4. lmxlab-0.2.0/.github/workflows/dependabot-auto-merge.yml +27 -0
  5. lmxlab-0.2.0/.github/workflows/docs.yml +30 -0
  6. lmxlab-0.2.0/.github/workflows/publish.yml +25 -0
  7. lmxlab-0.2.0/.gitignore +42 -0
  8. lmxlab-0.2.0/.pre-commit-config.yaml +29 -0
  9. lmxlab-0.2.0/CHANGELOG.md +92 -0
  10. lmxlab-0.2.0/CONTRIBUTING.md +153 -0
  11. lmxlab-0.2.0/LICENSE +21 -0
  12. lmxlab-0.2.0/PKG-INFO +212 -0
  13. lmxlab-0.2.0/README.md +169 -0
  14. lmxlab-0.2.0/docs/api/core.md +109 -0
  15. lmxlab-0.2.0/docs/api/data.md +61 -0
  16. lmxlab-0.2.0/docs/api/eval.md +56 -0
  17. lmxlab-0.2.0/docs/api/experiments.md +43 -0
  18. lmxlab-0.2.0/docs/api/index.md +15 -0
  19. lmxlab-0.2.0/docs/api/inference.md +50 -0
  20. lmxlab-0.2.0/docs/api/models.md +82 -0
  21. lmxlab-0.2.0/docs/api/training.md +63 -0
  22. lmxlab-0.2.0/docs/architecture/compiled-training.md +186 -0
  23. lmxlab-0.2.0/docs/architecture/configurable-block.md +211 -0
  24. lmxlab-0.2.0/docs/architecture/mlx-idioms.md +215 -0
  25. lmxlab-0.2.0/docs/architecture/overview.md +167 -0
  26. lmxlab-0.2.0/docs/architecture/production-optimizations.md +431 -0
  27. lmxlab-0.2.0/docs/architecture/unified-memory.md +202 -0
  28. lmxlab-0.2.0/docs/data/index.md +253 -0
  29. lmxlab-0.2.0/docs/devlog/index.md +562 -0
  30. lmxlab-0.2.0/docs/experiments/methodology.md +245 -0
  31. lmxlab-0.2.0/docs/getting-started/first-training-run.md +367 -0
  32. lmxlab-0.2.0/docs/getting-started/installation.md +79 -0
  33. lmxlab-0.2.0/docs/getting-started/quickstart.md +307 -0
  34. lmxlab-0.2.0/docs/index.md +99 -0
  35. lmxlab-0.2.0/docs/inference/index.md +193 -0
  36. lmxlab-0.2.0/docs/models/index.md +490 -0
  37. lmxlab-0.2.0/docs/recipes/index.md +414 -0
  38. lmxlab-0.2.0/docs/training/index.md +249 -0
  39. lmxlab-0.2.0/mkdocs.yml +71 -0
  40. lmxlab-0.2.0/pyproject.toml +90 -0
  41. lmxlab-0.2.0/recipes/ablation_gpt_to_llama.py +594 -0
  42. lmxlab-0.2.0/recipes/ablation_hyp001b.py +529 -0
  43. lmxlab-0.2.0/recipes/ablation_hyp001c.py +815 -0
  44. lmxlab-0.2.0/recipes/ablation_hyp001d.py +828 -0
  45. lmxlab-0.2.0/recipes/advanced_sampling.py +171 -0
  46. lmxlab-0.2.0/recipes/analyze_experiments.py +204 -0
  47. lmxlab-0.2.0/recipes/benchmark_compile.py +174 -0
  48. lmxlab-0.2.0/recipes/checkpoint_resume.py +154 -0
  49. lmxlab-0.2.0/recipes/compare_architectures.py +123 -0
  50. lmxlab-0.2.0/recipes/compare_kv_cache.py +294 -0
  51. lmxlab-0.2.0/recipes/compare_optimizers.py +369 -0
  52. lmxlab-0.2.0/recipes/compare_schedules.py +185 -0
  53. lmxlab-0.2.0/recipes/compare_training.py +141 -0
  54. lmxlab-0.2.0/recipes/distill_model.py +278 -0
  55. lmxlab-0.2.0/recipes/evaluate_model.py +163 -0
  56. lmxlab-0.2.0/recipes/finetune_lora.py +191 -0
  57. lmxlab-0.2.0/recipes/finetune_qlora.py +166 -0
  58. lmxlab-0.2.0/recipes/interactive_generate.py +129 -0
  59. lmxlab-0.2.0/recipes/load_pretrained.py +108 -0
  60. lmxlab-0.2.0/recipes/mup_coordinate_check.py +379 -0
  61. lmxlab-0.2.0/recipes/profile_models.py +124 -0
  62. lmxlab-0.2.0/recipes/quantize_and_generate.py +171 -0
  63. lmxlab-0.2.0/recipes/run_experiment.py +243 -0
  64. lmxlab-0.2.0/recipes/speculative_decoding.py +204 -0
  65. lmxlab-0.2.0/recipes/sweep_learning_rate.py +241 -0
  66. lmxlab-0.2.0/recipes/train_curriculum.py +254 -0
  67. lmxlab-0.2.0/recipes/train_deltanet.py +164 -0
  68. lmxlab-0.2.0/recipes/train_dpo.py +165 -0
  69. lmxlab-0.2.0/recipes/train_grpo.py +202 -0
  70. lmxlab-0.2.0/recipes/train_llama_shakespeare.py +145 -0
  71. lmxlab-0.2.0/recipes/train_moe.py +152 -0
  72. lmxlab-0.2.0/recipes/train_mtp.py +201 -0
  73. lmxlab-0.2.0/recipes/train_tiny_gpt.py +102 -0
  74. lmxlab-0.2.0/recipes/train_with_callbacks.py +163 -0
  75. lmxlab-0.2.0/recipes/train_with_datasets.py +169 -0
  76. lmxlab-0.2.0/src/lmxlab/__init__.py +20 -0
  77. lmxlab-0.2.0/src/lmxlab/__main__.py +5 -0
  78. lmxlab-0.2.0/src/lmxlab/cli.py +233 -0
  79. lmxlab-0.2.0/src/lmxlab/core/__init__.py +63 -0
  80. lmxlab-0.2.0/src/lmxlab/core/attention.py +541 -0
  81. lmxlab-0.2.0/src/lmxlab/core/block.py +147 -0
  82. lmxlab-0.2.0/src/lmxlab/core/config.py +162 -0
  83. lmxlab-0.2.0/src/lmxlab/core/deltanet.py +295 -0
  84. lmxlab-0.2.0/src/lmxlab/core/ffn.py +131 -0
  85. lmxlab-0.2.0/src/lmxlab/core/lora.py +311 -0
  86. lmxlab-0.2.0/src/lmxlab/core/mamba2.py +630 -0
  87. lmxlab-0.2.0/src/lmxlab/core/mamba3.py +459 -0
  88. lmxlab-0.2.0/src/lmxlab/core/mla.py +181 -0
  89. lmxlab-0.2.0/src/lmxlab/core/moe.py +403 -0
  90. lmxlab-0.2.0/src/lmxlab/core/norm.py +36 -0
  91. lmxlab-0.2.0/src/lmxlab/core/position.py +143 -0
  92. lmxlab-0.2.0/src/lmxlab/core/qlora.py +211 -0
  93. lmxlab-0.2.0/src/lmxlab/core/quantize.py +99 -0
  94. lmxlab-0.2.0/src/lmxlab/core/registry.py +106 -0
  95. lmxlab-0.2.0/src/lmxlab/core/sparse_attention.py +304 -0
  96. lmxlab-0.2.0/src/lmxlab/data/__init__.py +21 -0
  97. lmxlab-0.2.0/src/lmxlab/data/batching.py +66 -0
  98. lmxlab-0.2.0/src/lmxlab/data/dataset.py +166 -0
  99. lmxlab-0.2.0/src/lmxlab/data/tokenizer.py +248 -0
  100. lmxlab-0.2.0/src/lmxlab/eval/__init__.py +15 -0
  101. lmxlab-0.2.0/src/lmxlab/eval/metrics.py +168 -0
  102. lmxlab-0.2.0/src/lmxlab/experiments/__init__.py +54 -0
  103. lmxlab-0.2.0/src/lmxlab/experiments/analysis.py +174 -0
  104. lmxlab-0.2.0/src/lmxlab/experiments/flops.py +103 -0
  105. lmxlab-0.2.0/src/lmxlab/experiments/mlflow.py +206 -0
  106. lmxlab-0.2.0/src/lmxlab/experiments/profiling.py +231 -0
  107. lmxlab-0.2.0/src/lmxlab/experiments/runner.py +123 -0
  108. lmxlab-0.2.0/src/lmxlab/experiments/sweep.py +83 -0
  109. lmxlab-0.2.0/src/lmxlab/experiments/tracking.py +129 -0
  110. lmxlab-0.2.0/src/lmxlab/inference/__init__.py +17 -0
  111. lmxlab-0.2.0/src/lmxlab/inference/beam_search.py +110 -0
  112. lmxlab-0.2.0/src/lmxlab/inference/reward_model.py +45 -0
  113. lmxlab-0.2.0/src/lmxlab/inference/sampling.py +142 -0
  114. lmxlab-0.2.0/src/lmxlab/inference/speculative.py +101 -0
  115. lmxlab-0.2.0/src/lmxlab/models/__init__.py +108 -0
  116. lmxlab-0.2.0/src/lmxlab/models/bamba.py +135 -0
  117. lmxlab-0.2.0/src/lmxlab/models/base.py +188 -0
  118. lmxlab-0.2.0/src/lmxlab/models/convert.py +616 -0
  119. lmxlab-0.2.0/src/lmxlab/models/deepseek.py +206 -0
  120. lmxlab-0.2.0/src/lmxlab/models/falcon.py +137 -0
  121. lmxlab-0.2.0/src/lmxlab/models/gemma.py +68 -0
  122. lmxlab-0.2.0/src/lmxlab/models/gemma3.py +107 -0
  123. lmxlab-0.2.0/src/lmxlab/models/generate.py +266 -0
  124. lmxlab-0.2.0/src/lmxlab/models/glm.py +82 -0
  125. lmxlab-0.2.0/src/lmxlab/models/gpt.py +87 -0
  126. lmxlab-0.2.0/src/lmxlab/models/gpt_oss.py +77 -0
  127. lmxlab-0.2.0/src/lmxlab/models/grok.py +87 -0
  128. lmxlab-0.2.0/src/lmxlab/models/jamba.py +164 -0
  129. lmxlab-0.2.0/src/lmxlab/models/kimi.py +117 -0
  130. lmxlab-0.2.0/src/lmxlab/models/llama.py +92 -0
  131. lmxlab-0.2.0/src/lmxlab/models/llama4.py +203 -0
  132. lmxlab-0.2.0/src/lmxlab/models/mistral.py +81 -0
  133. lmxlab-0.2.0/src/lmxlab/models/mixtral.py +82 -0
  134. lmxlab-0.2.0/src/lmxlab/models/nemotron.py +324 -0
  135. lmxlab-0.2.0/src/lmxlab/models/olmo.py +78 -0
  136. lmxlab-0.2.0/src/lmxlab/models/qwen.py +146 -0
  137. lmxlab-0.2.0/src/lmxlab/models/qwen35.py +111 -0
  138. lmxlab-0.2.0/src/lmxlab/models/qwen_next.py +78 -0
  139. lmxlab-0.2.0/src/lmxlab/models/smollm.py +106 -0
  140. lmxlab-0.2.0/src/lmxlab/py.typed +0 -0
  141. lmxlab-0.2.0/src/lmxlab/training/__init__.py +56 -0
  142. lmxlab-0.2.0/src/lmxlab/training/callbacks.py +226 -0
  143. lmxlab-0.2.0/src/lmxlab/training/checkpoints.py +85 -0
  144. lmxlab-0.2.0/src/lmxlab/training/config.py +41 -0
  145. lmxlab-0.2.0/src/lmxlab/training/curriculum.py +113 -0
  146. lmxlab-0.2.0/src/lmxlab/training/distillation.py +144 -0
  147. lmxlab-0.2.0/src/lmxlab/training/dpo.py +76 -0
  148. lmxlab-0.2.0/src/lmxlab/training/grpo.py +64 -0
  149. lmxlab-0.2.0/src/lmxlab/training/grpo_trainer.py +244 -0
  150. lmxlab-0.2.0/src/lmxlab/training/mtp.py +233 -0
  151. lmxlab-0.2.0/src/lmxlab/training/optimizers.py +169 -0
  152. lmxlab-0.2.0/src/lmxlab/training/trainer.py +375 -0
  153. lmxlab-0.2.0/tests/__init__.py +0 -0
  154. lmxlab-0.2.0/tests/conftest.py +27 -0
  155. lmxlab-0.2.0/tests/test_advanced_training.py +265 -0
  156. lmxlab-0.2.0/tests/test_architectures.py +610 -0
  157. lmxlab-0.2.0/tests/test_behavioral.py +394 -0
  158. lmxlab-0.2.0/tests/test_cli.py +173 -0
  159. lmxlab-0.2.0/tests/test_convert.py +269 -0
  160. lmxlab-0.2.0/tests/test_core.py +295 -0
  161. lmxlab-0.2.0/tests/test_cross_reference.py +3120 -0
  162. lmxlab-0.2.0/tests/test_data.py +305 -0
  163. lmxlab-0.2.0/tests/test_eval.py +146 -0
  164. lmxlab-0.2.0/tests/test_experiments.py +368 -0
  165. lmxlab-0.2.0/tests/test_flops.py +178 -0
  166. lmxlab-0.2.0/tests/test_grpo_trainer.py +172 -0
  167. lmxlab-0.2.0/tests/test_inference.py +557 -0
  168. lmxlab-0.2.0/tests/test_lora.py +207 -0
  169. lmxlab-0.2.0/tests/test_lora_save_load.py +176 -0
  170. lmxlab-0.2.0/tests/test_models.py +1671 -0
  171. lmxlab-0.2.0/tests/test_mup.py +582 -0
  172. lmxlab-0.2.0/tests/test_profiling.py +194 -0
  173. lmxlab-0.2.0/tests/test_qlora.py +179 -0
  174. lmxlab-0.2.0/tests/test_quantize.py +316 -0
  175. lmxlab-0.2.0/tests/test_sampling.py +208 -0
  176. lmxlab-0.2.0/tests/test_training.py +332 -0
  177. lmxlab-0.2.0/uv.lock +3414 -0
@@ -0,0 +1,11 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+
8
+ - package-ecosystem: "github-actions"
9
+ directory: "/"
10
+ schedule:
11
+ interval: "weekly"
@@ -0,0 +1,13 @@
1
+ ## Summary
2
+
3
+ <!-- 1-3 sentences describing what this PR does and why -->
4
+
5
+ ## Changes
6
+
7
+ -
8
+
9
+ ## Checklist
10
+
11
+ - [ ] Tests added/updated
12
+ - [ ] Lint passes (`uv run ruff check src/ tests/ recipes/`)
13
+ - [ ] Format passes (`uv run ruff format --check src/ tests/ recipes/`)
@@ -0,0 +1,42 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v6
14
+ - uses: astral-sh/setup-uv@v7
15
+ with:
16
+ enable-cache: true
17
+ cache-dependency-glob: "uv.lock"
18
+ - run: uv sync --locked --extra dev
19
+ - run: uv run ruff check src/ tests/ recipes/
20
+ - run: uv run ruff format --check src/ tests/ recipes/
21
+
22
+ docs:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v6
26
+ - uses: astral-sh/setup-uv@v7
27
+ with:
28
+ enable-cache: true
29
+ cache-dependency-glob: "uv.lock"
30
+ - run: uv sync --locked --extra dev
31
+ - run: uv run mkdocs build --strict
32
+
33
+ test:
34
+ runs-on: macos-14
35
+ steps:
36
+ - uses: actions/checkout@v6
37
+ - uses: astral-sh/setup-uv@v7
38
+ with:
39
+ enable-cache: true
40
+ cache-dependency-glob: "uv.lock"
41
+ - run: uv sync --locked --extra dev
42
+ - run: uv run pytest tests/ -v -m "not slow"
@@ -0,0 +1,27 @@
1
+ name: Dependabot Auto-Merge
2
+
3
+ on: pull_request
4
+
5
+ permissions:
6
+ contents: write
7
+ pull-requests: write
8
+
9
+ jobs:
10
+ auto-merge:
11
+ runs-on: ubuntu-latest
12
+ if: github.actor == 'dependabot[bot]'
13
+ steps:
14
+ - name: Fetch Dependabot metadata
15
+ id: metadata
16
+ uses: dependabot/fetch-metadata@v2
17
+ with:
18
+ github-token: "${{ secrets.GITHUB_TOKEN }}"
19
+
20
+ - name: Auto-merge minor and patch updates
21
+ if: >-
22
+ steps.metadata.outputs.update-type == 'version-update:semver-minor' ||
23
+ steps.metadata.outputs.update-type == 'version-update:semver-patch'
24
+ run: gh pr merge --auto --squash "$PR_URL"
25
+ env:
26
+ PR_URL: ${{ github.event.pull_request.html_url }}
27
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,30 @@
1
+ name: Docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ permissions:
8
+ contents: read
9
+ pages: write
10
+ id-token: write
11
+
12
+ jobs:
13
+ deploy:
14
+ runs-on: ubuntu-latest
15
+ environment:
16
+ name: github-pages
17
+ url: ${{ steps.deployment.outputs.page_url }}
18
+ steps:
19
+ - uses: actions/checkout@v6
20
+ - uses: astral-sh/setup-uv@v7
21
+ with:
22
+ enable-cache: true
23
+ cache-dependency-glob: "uv.lock"
24
+ - run: uv sync --locked --extra dev
25
+ - run: uv run mkdocs build
26
+ - uses: actions/upload-pages-artifact@v4
27
+ with:
28
+ path: site
29
+ - id: deployment
30
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,25 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment: pypi
14
+ steps:
15
+ - uses: actions/checkout@v6
16
+
17
+ - uses: astral-sh/setup-uv@v7
18
+ with:
19
+ enable-cache: true
20
+ cache-dependency-glob: "uv.lock"
21
+
22
+ - run: uv build
23
+
24
+ - name: Publish to PyPI
25
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,42 @@
1
+ # Python
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ dist/
6
+ *.egg-info/
7
+
8
+ # Tool caches
9
+ .ruff_cache/
10
+ .mypy_cache/
11
+ .pytest_cache/
12
+
13
+ # Virtual environment
14
+ .venv/
15
+
16
+ # Claude Code (local-only)
17
+ .claude/
18
+ CLAUDE.md
19
+
20
+ # Secrets
21
+ .env
22
+ .env.*
23
+ credentials.json
24
+
25
+ # Data (downloaded by recipes)
26
+ /data/
27
+
28
+ # Experiment results
29
+ results.jsonl
30
+
31
+ # Agent team output (temporary)
32
+ .team-output/
33
+
34
+ # Checkpoints
35
+ checkpoints/
36
+ *.safetensors
37
+
38
+ # Build artifacts
39
+ site/
40
+
41
+ # OS
42
+ .DS_Store
@@ -0,0 +1,29 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-toml
9
+ - id: check-added-large-files
10
+ args: ["--maxkb=500"]
11
+
12
+ - repo: https://github.com/astral-sh/ruff-pre-commit
13
+ rev: v0.11.3
14
+ hooks:
15
+ - id: ruff
16
+ args: [--fix]
17
+ - id: ruff-format
18
+
19
+ - repo: https://github.com/astral-sh/uv-pre-commit
20
+ rev: 0.7.12
21
+ hooks:
22
+ - id: uv-lock
23
+
24
+ - repo: https://github.com/compilerla/conventional-pre-commit
25
+ rev: v4.0.0
26
+ hooks:
27
+ - id: conventional-pre-commit
28
+ stages: [commit-msg]
29
+ args: [feat, fix, refactor, test, docs, chore, ci, build, perf]
@@ -0,0 +1,92 @@
1
+ # Changelog
2
+
3
+ All notable changes to lmxlab will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.2.0] - 2026-03-14
11
+
12
+ ### Added
13
+
14
+ - **16 new architecture config factories**: DeepSeek V3 (MLA + MoE),
15
+ Nemotron (hybrid Mamba-Transformer MoE), Llama 4 Scout/Maverick
16
+ (iRoPE + chunked attention + MoE), Mistral Small (sliding window),
17
+ OLMo 2 (QK-norm), GPT-OSS (QK-norm), Grok (SharedExpertMoE),
18
+ Kimi K2.5 (DeltaNet + MoE), Qwen-Next (gated attention),
19
+ SmolLM3 (iRoPE), Qwen 3 MoE, Falcon H1 (hybrid Mamba-2),
20
+ Jamba (Mamba-2 + MoE), Bamba (hybrid Mamba-2), GLM-4.5 (MLA NoPE)
21
+ - **Mamba-2 SSD**: structured state-space sequence mixer with chunked
22
+ parallel scan and recurrent inference paths
23
+ - **Mamba-3**: trapezoidal discretization, BCNorm, complex A
24
+ (data-dependent RoPE on B/C)
25
+ - **QK-norm**: per-head RMSNorm on Q and K projections (OLMo 2 style)
26
+ - **GatedGQA**: sigmoid output gating on attention
27
+ (arXiv:2505.06708)
28
+ - **ChunkedGQA**: fixed-size local attention with per-chunk RoPE
29
+ (Llama 4 iRoPE pattern)
30
+ - **LatentMoE**: down-project before routing for many-expert MoE
31
+ (arXiv:2601.18089)
32
+ - **SharedExpertMoE**: shared expert alongside routed experts
33
+ (DeepSeek V3 style)
34
+ - **ReluSquaredFFN**: squared ReLU activation (Primer / Nemotron)
35
+ - **muP parameterization**: width-independent hyperparameter transfer
36
+ - **Dropout support**: configurable dropout in attention and FFN
37
+ - **SparseGQA (DSA)**: DeepSeek Sparse Attention with compressed tokens,
38
+ selected tokens, and sliding window (arXiv:2512.02556)
39
+ - **GRPOTrainer**: full GRPO training loop with group sampling, reward
40
+ scoring, and clipped surrogate objective (arXiv:2501.12948)
41
+ - **Beam search**: standard beam search with optional custom scoring
42
+ - **RewardModel**: language model + scalar head for reward scoring
43
+
44
+ ## [0.1.0] - 2026-03-11
45
+
46
+ Initial release.
47
+
48
+ ### Added
49
+
50
+ - **8 architecture config factories**: GPT, LLaMA, Gemma, Qwen, Mixtral (MoE),
51
+ DeepSeek V2 (MLA), Gemma 3 (sliding window), Qwen 3.5 (hybrid DeltaNet)
52
+ - **Core components**: MHA, GQA, MLA, GatedDeltaNet, SlidingWindowGQA,
53
+ StandardFFN, GatedFFN, MoEFFN, SharedExpertMoEFFN, RMSNorm, LayerNorm,
54
+ RoPE, ALiBi, sinusoidal positional encoding
55
+ - **ConfigurableBlock** with typed Registry pattern for component resolution
56
+ - **LanguageModel** base class with tied/untied embeddings and KV cache
57
+ - **Compiled training** with `mx.compile`, `nn.value_and_grad`, gradient
58
+ clipping, and cosine/linear/warmup learning rate schedules
59
+ - **Optimizers**: AdamW, Lion, Adafactor, SGD with momentum
60
+ - **Advanced training**: DPO, GRPO, multi-token prediction, curriculum
61
+ learning, knowledge distillation
62
+ - **LoRA and QLoRA**: parameter-efficient fine-tuning with optional 4-bit
63
+ quantization
64
+ - **Post-training quantization**: 4-bit and 8-bit via MLX native quantization,
65
+ with dequantization support
66
+ - **Inference**: autoregressive generation with KV cache, streaming generation,
67
+ top-k/top-p/temperature sampling, repetition penalty, stop tokens
68
+ - **Advanced inference**: best-of-N sampling, majority vote, speculative
69
+ decoding
70
+ - **HuggingFace integration**: load pretrained weights (`load_from_hf`),
71
+ tokenizer wrapper (`HFTokenizer`), streaming dataset (`HFDataset`)
72
+ - **Data pipeline**: CharTokenizer, TiktokenTokenizer, TextDataset,
73
+ TokenDataset, batch iterator
74
+ - **Evaluation**: perplexity, bits-per-byte, pass@k for code generation
75
+ - **Experiment framework**: ExperimentRunner with time budgets, ExperimentLog,
76
+ grid/random hyperparameter sweeps, statistical analysis (confidence
77
+ intervals, Cohen's d, experiment comparison)
78
+ - **MLX profiling**: benchmark_fn, memory_estimate, profile_forward,
79
+ profile_generation, count_parameters_by_module
80
+ - **CLI**: `lmxlab list`, `lmxlab info`, `lmxlab count`
81
+ - **Callbacks**: MetricsLogger, EarlyStopping, ThroughputMonitor
82
+ - **Checkpointing**: save/load via safetensors with JSON metadata
83
+ - **31 recipe scripts** covering training, fine-tuning, DPO, GRPO, MTP,
84
+ distillation, curriculum learning, architecture comparison, ablation
85
+ studies, quantization, speculative decoding, and more
86
+ - **Documentation site** with MkDocs Material: architecture guides, MLX
87
+ idioms, model comparison, API reference, recipes index, devlog
88
+ - **PyPI publish workflow** via trusted publishing (OIDC)
89
+
90
+ [Unreleased]: https://github.com/michaelellis003/lmxlab/compare/v0.2.0...HEAD
91
+ [0.2.0]: https://github.com/michaelellis003/lmxlab/compare/v0.1.0...v0.2.0
92
+ [0.1.0]: https://github.com/michaelellis003/lmxlab/releases/tag/v0.1.0
@@ -0,0 +1,153 @@
1
+ # Contributing to lmxlab
2
+
3
+ Thanks for your interest in contributing! lmxlab is a research platform,
4
+ so clarity and rapid iteration are valued over production optimization.
5
+
6
+ ## Setup
7
+
8
+ ```bash
9
+ git clone https://github.com/michaelellis003/lmxlab.git
10
+ cd lmxlab
11
+ uv sync --extra dev
12
+ uv run pre-commit install
13
+ uv run pre-commit install --hook-type commit-msg
14
+ ```
15
+
16
+ The pre-commit hooks will automatically:
17
+ - Run ruff lint and format checks on staged files
18
+ - Verify `uv.lock` stays in sync with `pyproject.toml`
19
+ - Enforce conventional commit message format
20
+
21
+ ## Development workflow
22
+
23
+ 1. Create a feature branch from `main`:
24
+ ```bash
25
+ git checkout -b feat/my-feature
26
+ ```
27
+
28
+ 2. Write tests first (TDD). Tests go in `tests/`:
29
+ ```bash
30
+ uv run pytest tests/test_my_module.py -v
31
+ ```
32
+
33
+ 3. Implement the feature in `src/lmxlab/`.
34
+
35
+ 4. Verify everything passes locally before pushing:
36
+ ```bash
37
+ uv run pytest # All tests
38
+ uv run ruff check src/ tests/ recipes/ # Lint
39
+ uv run ruff format --check src/ tests/ recipes/ # Formatting
40
+ uv run mkdocs build --strict # Docs build
41
+ ```
42
+
43
+ 5. Open a PR against `main`. CI must pass before merging.
44
+
45
+ ## CI pipeline
46
+
47
+ Every PR runs three jobs:
48
+
49
+ - **lint** (ubuntu): ruff check + format on `src/`, `tests/`, `recipes/`
50
+ - **docs** (ubuntu): `mkdocs build --strict` catches broken links/refs
51
+ - **test** (macos-14): pytest on Apple Silicon (MLX requires M-series)
52
+
53
+ All three must pass before merging. Do not bypass CI with `--admin`.
54
+
55
+ ## Keeping `uv.lock` in sync
56
+
57
+ If you change `pyproject.toml` (add/remove/update dependencies), you must
58
+ regenerate the lockfile:
59
+
60
+ ```bash
61
+ uv lock
62
+ ```
63
+
64
+ The `uv-lock` pre-commit hook catches this automatically. If CI fails with
65
+ "lockfile needs to be updated", run `uv lock` and commit the result.
66
+
67
+ ## Branch naming
68
+
69
+ - `feat/` — new features
70
+ - `fix/` — bug fixes
71
+ - `docs/` — documentation changes
72
+ - `refactor/` — code restructuring
73
+ - `test/` — test additions
74
+
75
+ ## Commit messages
76
+
77
+ Follow the `type: description` format (enforced by pre-commit hook):
78
+
79
+ ```
80
+ feat: add sliding window attention
81
+ fix: correct RoPE dimension calculation
82
+ docs: expand installation guide
83
+ test: add CLI command tests
84
+ refactor: simplify MoE routing logic
85
+ ```
86
+
87
+ ## Code style
88
+
89
+ - **Line length:** 79 characters
90
+ - **Quotes:** double quotes (enforced by ruff)
91
+ - **Docstrings:** Google style
92
+ - **Type annotations:** required on all public functions
93
+ - **Imports:** sorted by ruff (stdlib, third-party, local)
94
+
95
+ Ruff handles formatting and linting:
96
+ ```bash
97
+ ruff check --fix src/ tests/ recipes/ # Auto-fix lint issues
98
+ ruff format src/ tests/ recipes/ # Auto-format
99
+ ```
100
+
101
+ ## Testing
102
+
103
+ - Use **behavioral tests** for ML code:
104
+ - Shape tests: output dimensions are correct
105
+ - Invariance tests: same input + seed = same output
106
+ - Directional tests: loss decreases after training
107
+ - Minimum functionality: no NaN, no Inf
108
+ - Keep tests fast: use tiny model configs (`gpt_tiny()`, `llama_tiny()`)
109
+ - Mark slow tests with `@pytest.mark.slow`
110
+
111
+ ## Architecture guidelines
112
+
113
+ - **Config factories, not subclasses.** New architectures should be
114
+ config factory functions, not new model classes.
115
+ - **Registry pattern.** New attention/FFN/norm types should register
116
+ themselves in the appropriate registry.
117
+ - **Simplicity bias.** When two approaches achieve similar results,
118
+ prefer the simpler one.
119
+ - **Clarity.** Comments should explain *why*, not just *what*.
120
+
121
+ ## Citations and attribution
122
+
123
+ Every new building block (attention, FFN, position encoding, SSM,
124
+ etc.) must cite its source paper with an arXiv ID in the module
125
+ docstring:
126
+
127
+ ```python
128
+ """My new attention variant.
129
+
130
+ Reference: Author (Year, arXiv:XXXX.XXXXX)
131
+ """
132
+ ```
133
+
134
+ Code adapted from reference implementations must note the source:
135
+
136
+ ```python
137
+ # Cross-references:
138
+ # - org/repo filename.py (canonical implementation)
139
+ # - HuggingFace transformers modeling_xxx.py
140
+ ```
141
+
142
+ Use `Reference:` for the originating paper and `Cross-references:`
143
+ for implementation sources consulted during development.
144
+
145
+ ## Adding a new architecture
146
+
147
+ 1. Create `src/lmxlab/models/myarch.py` with a config factory function
148
+ 2. Register any new components in the appropriate registry
149
+ 3. Add a `myarch_tiny()` config for tests
150
+ 4. Add tests in `tests/test_architectures.py`
151
+ 5. Update `src/lmxlab/models/__init__.py` exports
152
+ 6. Add to CLI in `src/lmxlab/cli.py`
153
+ 7. Document in `docs/models/index.md`
lmxlab-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Michael Ellis
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.