mlxsmith 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mlxsmith-0.1.2/PKG-INFO +283 -0
  2. mlxsmith-0.1.2/README.md +228 -0
  3. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/pyproject.toml +2 -3
  4. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/accel/__init__.py +0 -3
  5. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/api/handlers.py +2 -5
  6. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/cli.py +5 -6
  7. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/config.py +1 -2
  8. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/config_models.py +1 -1
  9. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/interface.py +3 -5
  10. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/mock_backend.py +1 -1
  11. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/daemon.py +4 -9
  12. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/inference_worker.py +1 -1
  13. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/queue.py +2 -2
  14. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/trainer_worker.py +0 -2
  15. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/loop.py +6 -8
  16. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/weights.py +1 -1
  17. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/future.py +2 -3
  18. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/sampling_client.py +1 -2
  19. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/training_client.py +3 -4
  20. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/server.py +1 -6
  21. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/lora.py +4 -0
  22. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/util.py +0 -6
  23. mlxsmith-0.1.2/src/mlxsmith.egg-info/PKG-INFO +283 -0
  24. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/SOURCES.txt +0 -1
  25. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/requires.txt +0 -3
  26. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_api.py +0 -4
  27. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_sdk.py +2 -8
  28. mlxsmith-0.1.0/PKG-INFO +0 -163
  29. mlxsmith-0.1.0/README.md +0 -106
  30. mlxsmith-0.1.0/src/mlxsmith/accel/zmlx_backend.py +0 -42
  31. mlxsmith-0.1.0/src/mlxsmith.egg-info/PKG-INFO +0 -163
  32. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/LICENSE +0 -0
  33. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/setup.cfg +0 -0
  34. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/__init__.py +0 -0
  35. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/accel/base.py +0 -0
  36. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/accel/none.py +0 -0
  37. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/adapters.py +0 -0
  38. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/api/__init__.py +0 -0
  39. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/api/schemas.py +0 -0
  40. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/auth.py +0 -0
  41. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/bench.py +0 -0
  42. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/data.py +0 -0
  43. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/envs/__init__.py +0 -0
  44. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/envs/system.py +0 -0
  45. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/envs/token_env.py +0 -0
  46. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/eval.py +0 -0
  47. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/infer.py +0 -0
  48. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/__init__.py +0 -0
  49. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/backend.py +0 -0
  50. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/mlx_lm_backend.py +0 -0
  51. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/registry.py +0 -0
  52. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/models.py +0 -0
  53. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/__init__.py +0 -0
  54. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/__init__.py +0 -0
  55. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/corpus.py +0 -0
  56. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/gating.py +0 -0
  57. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/generate.py +0 -0
  58. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/history.py +0 -0
  59. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/inference.py +0 -0
  60. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/mutate.py +0 -0
  61. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/trainer.py +0 -0
  62. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/runs.py +0 -0
  63. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/__init__.py +0 -0
  64. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/losses.py +0 -0
  65. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/__init__.py +0 -0
  66. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/distill.py +0 -0
  67. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/pref.py +0 -0
  68. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/rft.py +0 -0
  69. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/sft.py +0 -0
  70. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/__init__.py +0 -0
  71. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/compose.py +0 -0
  72. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/docker_verifier.py +0 -0
  73. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/jsonschema.py +0 -0
  74. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/pytest_verifier.py +0 -0
  75. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/regex.py +0 -0
  76. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/types.py +0 -0
  77. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/dependency_links.txt +0 -0
  78. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/entry_points.txt +0 -0
  79. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/top_level.txt +0 -0
  80. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_auth.py +0 -0
  81. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_config.py +0 -0
  82. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_data.py +0 -0
  83. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_rlm.py +0 -0
  84. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_rlm_mutation.py +0 -0
  85. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_runs.py +0 -0
  86. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_training_smoke.py +0 -0
  87. {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_verifiers.py +0 -0
@@ -0,0 +1,283 @@
1
+ Metadata-Version: 2.4
2
+ Name: mlxsmith
3
+ Version: 0.1.2
4
+ Summary: Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
5
+ Author-email: Shannon Labs <hmbown@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
8
+ Project-URL: Repository, https://github.com/Hmbown/MLXSmith
9
+ Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
10
+ Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Operating System :: MacOS :: MacOS X
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: typer>=0.9.0
26
+ Requires-Dist: rich>=13.7.0
27
+ Requires-Dist: pyyaml>=6.0.1
28
+ Requires-Dist: pydantic>=2.5.0
29
+ Requires-Dist: pydantic-settings>=2.2.1
30
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
31
+ Requires-Dist: huggingface_hub>=1.3.4
32
+ Requires-Dist: jsonschema>=4.21.0
33
+ Provides-Extra: mlx
34
+ Requires-Dist: mlx>=0.30.4; extra == "mlx"
35
+ Provides-Extra: llm
36
+ Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
37
+ Requires-Dist: transformers>=5.0.0; extra == "llm"
38
+ Requires-Dist: datasets>=3.0.0; extra == "llm"
39
+ Provides-Extra: serve
40
+ Requires-Dist: fastapi>=0.128.0; extra == "serve"
41
+ Requires-Dist: uvicorn>=0.40.0; extra == "serve"
42
+ Requires-Dist: httpx>=0.28.0; extra == "serve"
43
+ Provides-Extra: dev
44
+ Requires-Dist: pytest>=9.0.0; extra == "dev"
45
+ Requires-Dist: ruff>=0.14.0; extra == "dev"
46
+ Provides-Extra: all
47
+ Requires-Dist: mlx>=0.30.4; extra == "all"
48
+ Requires-Dist: mlx-lm>=0.30.5; extra == "all"
49
+ Requires-Dist: transformers>=5.0.0; extra == "all"
50
+ Requires-Dist: datasets>=3.0.0; extra == "all"
51
+ Requires-Dist: fastapi>=0.128.0; extra == "all"
52
+ Requires-Dist: uvicorn>=0.40.0; extra == "all"
53
+ Requires-Dist: httpx>=0.28.0; extra == "all"
54
+ Dynamic: license-file
55
+
56
+ # mlxsmith
57
+
58
+ Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
59
+
60
+ **Status:** alpha (v0.1.2). Full training pipeline validated on Qwen3-4B.
61
+
62
+ ## Install
63
+
64
+ MLX training and serving require macOS on Apple Silicon.
65
+ Other platforms can use data tools and mock backends.
66
+
67
+ ```bash
68
+ python -m venv .venv && source .venv/bin/activate
69
+ pip install -U pip
70
+
71
+ # Core CLI (data tools, config, project scaffolding)
72
+ pip install mlxsmith
73
+
74
+ # Apple Silicon training + serving
75
+ pip install "mlxsmith[mlx,llm,serve]"
76
+
77
+ # Everything
78
+ pip install "mlxsmith[all]"
79
+ ```
80
+
81
+ ## Quickstart
82
+
83
+ ```bash
84
+ mlxsmith init myproj
85
+ cd myproj
86
+ mlxsmith doctor # check Python, MLX, Metal
87
+ ```
88
+
89
+ ## Training
90
+
91
+ ### SFT (LoRA/QLoRA)
92
+
93
+ ```bash
94
+ mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
95
+ ```
96
+
97
+ Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
98
+
99
+ ### Preference tuning (DPO/ORPO)
100
+
101
+ ```bash
102
+ mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
103
+ --data data/prefs --algo dpo
104
+ ```
105
+
106
+ Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
107
+
108
+ ### Reinforced fine-tuning (GRPO)
109
+
110
+ ```bash
111
+ mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
112
+ --env envs/coding.yaml --verifier verifiers/pytest.py
113
+ ```
114
+
115
+ GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
116
+
117
+ ### Knowledge distillation
118
+
119
+ ```bash
120
+ # Offline distillation (teacher generates, student learns)
121
+ mlxsmith distill --teacher large-model --student small-model --mode offline
122
+
123
+ # Online preference distillation (OPD)
124
+ mlxsmith distill --teacher large-model --student small-model --mode opd
125
+ ```
126
+
127
+ ### Full pipeline
128
+
129
+ ```bash
130
+ # Run SFT → Pref → RFT in sequence
131
+ mlxsmith pipeline
132
+ ```
133
+
134
+ ## Serving
135
+
136
+ OpenAI-compatible `/v1/chat/completions` endpoint.
137
+
138
+ ```bash
139
+ mlxsmith serve --model runs/sft_0001/adapter --port 8080
140
+ ```
141
+
142
+ ```bash
143
+ curl http://localhost:8080/v1/chat/completions \
144
+ -H 'Content-Type: application/json' \
145
+ -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
146
+ ```
147
+
148
+ Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
149
+
150
+ ## Data tools
151
+
152
+ ```bash
153
+ mlxsmith data presets # list built-in datasets
154
+ mlxsmith data pull alpaca # pull a preset
155
+ mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
156
+ mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
157
+ mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
158
+ mlxsmith data validate data/sft/train.jsonl # structure check
159
+ ```
160
+
161
+ Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
162
+
163
+ ## Model management
164
+
165
+ ```bash
166
+ # Pull + convert HF model to MLX
167
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
168
+
169
+ # With quantization
170
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
171
+
172
+ # Merge adapters
173
+ mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
174
+ ```
175
+
176
+ ## HF auth
177
+
178
+ ```bash
179
+ mlxsmith auth login --token "$HF_TOKEN"
180
+ mlxsmith auth status
181
+ mlxsmith auth logout
182
+ ```
183
+
184
+ ## Eval and bench
185
+
186
+ ```bash
187
+ # Evaluation suite (pass@k with verifier checks)
188
+ mlxsmith eval --suite eval/suites/coding.yaml
189
+
190
+ # Benchmark inference or training throughput
191
+ mlxsmith bench --mode inference
192
+ mlxsmith bench --mode trainer
193
+ mlxsmith bench --mode end_to_end
194
+ ```
195
+
196
+ ## Verifiers
197
+
198
+ Built-in verifiers for eval, RFT, and preference tuning:
199
+
200
+ - **regex** — pattern matching on completions
201
+ - **jsonschema** — JSON structure validation
202
+ - **pytest** — sandboxed test execution
203
+ - **docker** — containerized verification
204
+ - **compose** — multi-verifier composition (AND/OR/weighted)
205
+
206
+ See `docs/VERIFIERS.md` for the verifier API.
207
+
208
+ ## Environment plugin system
209
+
210
+ ```bash
211
+ mlxsmith env list # list available environments
212
+ mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
213
+ mlxsmith env init my_env # scaffold a new environment
214
+ mlxsmith env install ./my_env # install from directory
215
+ mlxsmith env package ./my_env # create distributable tarball
216
+ mlxsmith env run envs/coding.yaml # execute RFT with this environment
217
+ ```
218
+
219
+ Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
220
+
221
+ ## Config system
222
+
223
+ ```bash
224
+ mlxsmith config show # display merged config (YAML/JSON/TOML)
225
+ mlxsmith config show --sources # show where each value comes from
226
+ mlxsmith config init # create default mlxsmith.yaml
227
+ mlxsmith config validate # check config structure
228
+ mlxsmith config env # show environment variable mapping
229
+ ```
230
+
231
+ Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
232
+
233
+ ## SDK (programmatic API)
234
+
235
+ For building custom training loops:
236
+
237
+ ```python
238
+ from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
239
+
240
+ loaded = load_model("path/to/model", config)
241
+
242
+ # Sampling with logprobs
243
+ sampler = SamplingClient(loaded.backend)
244
+ result = sampler.sample("prompt", logprobs_k=5)
245
+
246
+ # Training operations
247
+ trainer = TrainingClient(loaded.backend)
248
+ trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
249
+ fb = trainer.forward_backward(batch)
250
+ trainer.optim_step(fb.result().grads)
251
+ ```
252
+
253
+ Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
254
+
255
+ ## Research
256
+
257
+ ### RLM self-play loop
258
+
259
+ RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
260
+
261
+ ```bash
262
+ mlxsmith rlm # single-process RLM
263
+ mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
264
+ mlxsmith rlm status # check iteration state
265
+ mlxsmith rlm history # view history
266
+ ```
267
+
268
+ Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
269
+
270
+ ## Docs
271
+
272
+ - `docs/PROJECT_FORMAT.md` — project layout and artifacts
273
+ - `docs/VERIFIERS.md` — verifier API and sandbox behavior
274
+ - `docs/COMPATIBILITY.md` — tested versions and model families
275
+ - `docs/ENVIRONMENTS.md` — environment plugin system
276
+ - `docs/orchestrator.md` — multi-process RLM orchestrator
277
+ - `docs/rlm-ctl.md` — RLM training guide
278
+ - `docs/ROADMAP.md` — product direction and milestones
279
+ - `docs/README.md` — full docs index
280
+
281
+ ## License
282
+
283
+ MIT
@@ -0,0 +1,228 @@
1
+ # mlxsmith
2
+
3
+ Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
4
+
5
+ **Status:** alpha (v0.1.2). Full training pipeline validated on Qwen3-4B.
6
+
7
+ ## Install
8
+
9
+ MLX training and serving require macOS on Apple Silicon.
10
+ Other platforms can use data tools and mock backends.
11
+
12
+ ```bash
13
+ python -m venv .venv && source .venv/bin/activate
14
+ pip install -U pip
15
+
16
+ # Core CLI (data tools, config, project scaffolding)
17
+ pip install mlxsmith
18
+
19
+ # Apple Silicon training + serving
20
+ pip install "mlxsmith[mlx,llm,serve]"
21
+
22
+ # Everything
23
+ pip install "mlxsmith[all]"
24
+ ```
25
+
26
+ ## Quickstart
27
+
28
+ ```bash
29
+ mlxsmith init myproj
30
+ cd myproj
31
+ mlxsmith doctor # check Python, MLX, Metal
32
+ ```
33
+
34
+ ## Training
35
+
36
+ ### SFT (LoRA/QLoRA)
37
+
38
+ ```bash
39
+ mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
40
+ ```
41
+
42
+ Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
43
+
44
+ ### Preference tuning (DPO/ORPO)
45
+
46
+ ```bash
47
+ mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
48
+ --data data/prefs --algo dpo
49
+ ```
50
+
51
+ Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
52
+
53
+ ### Reinforced fine-tuning (GRPO)
54
+
55
+ ```bash
56
+ mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
57
+ --env envs/coding.yaml --verifier verifiers/pytest.py
58
+ ```
59
+
60
+ GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
61
+
62
+ ### Knowledge distillation
63
+
64
+ ```bash
65
+ # Offline distillation (teacher generates, student learns)
66
+ mlxsmith distill --teacher large-model --student small-model --mode offline
67
+
68
+ # Online preference distillation (OPD)
69
+ mlxsmith distill --teacher large-model --student small-model --mode opd
70
+ ```
71
+
72
+ ### Full pipeline
73
+
74
+ ```bash
75
+ # Run SFT → Pref → RFT in sequence
76
+ mlxsmith pipeline
77
+ ```
78
+
79
+ ## Serving
80
+
81
+ OpenAI-compatible `/v1/chat/completions` endpoint.
82
+
83
+ ```bash
84
+ mlxsmith serve --model runs/sft_0001/adapter --port 8080
85
+ ```
86
+
87
+ ```bash
88
+ curl http://localhost:8080/v1/chat/completions \
89
+ -H 'Content-Type: application/json' \
90
+ -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
91
+ ```
92
+
93
+ Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
94
+
95
+ ## Data tools
96
+
97
+ ```bash
98
+ mlxsmith data presets # list built-in datasets
99
+ mlxsmith data pull alpaca # pull a preset
100
+ mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
101
+ mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
102
+ mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
103
+ mlxsmith data validate data/sft/train.jsonl # structure check
104
+ ```
105
+
106
+ Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
107
+
108
+ ## Model management
109
+
110
+ ```bash
111
+ # Pull + convert HF model to MLX
112
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
113
+
114
+ # With quantization
115
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
116
+
117
+ # Merge adapters
118
+ mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
119
+ ```
120
+
121
+ ## HF auth
122
+
123
+ ```bash
124
+ mlxsmith auth login --token "$HF_TOKEN"
125
+ mlxsmith auth status
126
+ mlxsmith auth logout
127
+ ```
128
+
129
+ ## Eval and bench
130
+
131
+ ```bash
132
+ # Evaluation suite (pass@k with verifier checks)
133
+ mlxsmith eval --suite eval/suites/coding.yaml
134
+
135
+ # Benchmark inference or training throughput
136
+ mlxsmith bench --mode inference
137
+ mlxsmith bench --mode trainer
138
+ mlxsmith bench --mode end_to_end
139
+ ```
140
+
141
+ ## Verifiers
142
+
143
+ Built-in verifiers for eval, RFT, and preference tuning:
144
+
145
+ - **regex** — pattern matching on completions
146
+ - **jsonschema** — JSON structure validation
147
+ - **pytest** — sandboxed test execution
148
+ - **docker** — containerized verification
149
+ - **compose** — multi-verifier composition (AND/OR/weighted)
150
+
151
+ See `docs/VERIFIERS.md` for the verifier API.
152
+
153
+ ## Environment plugin system
154
+
155
+ ```bash
156
+ mlxsmith env list # list available environments
157
+ mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
158
+ mlxsmith env init my_env # scaffold a new environment
159
+ mlxsmith env install ./my_env # install from directory
160
+ mlxsmith env package ./my_env # create distributable tarball
161
+ mlxsmith env run envs/coding.yaml # execute RFT with this environment
162
+ ```
163
+
164
+ Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
165
+
166
+ ## Config system
167
+
168
+ ```bash
169
+ mlxsmith config show # display merged config (YAML/JSON/TOML)
170
+ mlxsmith config show --sources # show where each value comes from
171
+ mlxsmith config init # create default mlxsmith.yaml
172
+ mlxsmith config validate # check config structure
173
+ mlxsmith config env # show environment variable mapping
174
+ ```
175
+
176
+ Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
177
+
178
+ ## SDK (programmatic API)
179
+
180
+ For building custom training loops:
181
+
182
+ ```python
183
+ from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
184
+
185
+ loaded = load_model("path/to/model", config)
186
+
187
+ # Sampling with logprobs
188
+ sampler = SamplingClient(loaded.backend)
189
+ result = sampler.sample("prompt", logprobs_k=5)
190
+
191
+ # Training operations
192
+ trainer = TrainingClient(loaded.backend)
193
+ trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
194
+ fb = trainer.forward_backward(batch)
195
+ trainer.optim_step(fb.result().grads)
196
+ ```
197
+
198
+ Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
199
+
200
+ ## Research
201
+
202
+ ### RLM self-play loop
203
+
204
+ RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
205
+
206
+ ```bash
207
+ mlxsmith rlm # single-process RLM
208
+ mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
209
+ mlxsmith rlm status # check iteration state
210
+ mlxsmith rlm history # view history
211
+ ```
212
+
213
+ Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
214
+
215
+ ## Docs
216
+
217
+ - `docs/PROJECT_FORMAT.md` — project layout and artifacts
218
+ - `docs/VERIFIERS.md` — verifier API and sandbox behavior
219
+ - `docs/COMPATIBILITY.md` — tested versions and model families
220
+ - `docs/ENVIRONMENTS.md` — environment plugin system
221
+ - `docs/orchestrator.md` — multi-process RLM orchestrator
222
+ - `docs/rlm-ctl.md` — RLM training guide
223
+ - `docs/ROADMAP.md` — product direction and milestones
224
+ - `docs/README.md` — full docs index
225
+
226
+ ## License
227
+
228
+ MIT
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mlxsmith"
7
- version = "0.1.0"
8
- description = "Apple Silicon MLX fine-tuning and OpenAI-compatible serving (SFT stable; preference/RL experimental)."
7
+ version = "0.1.2"
8
+ description = "Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving."
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  requires-python = ">=3.10"
11
11
  license = {text = "MIT"}
@@ -52,7 +52,6 @@ serve = [
52
52
  "uvicorn>=0.40.0",
53
53
  "httpx>=0.28.0",
54
54
  ]
55
- zmlx = ["zmlx"]
56
55
  dev = ["pytest>=9.0.0", "ruff>=0.14.0"]
57
56
  all = [
58
57
  "mlx>=0.30.4",
@@ -1,10 +1,7 @@
1
1
  from __future__ import annotations
2
2
  from .none import NoneBackend
3
- from .zmlx_backend import ZMLXBackend
4
3
 
5
4
  def get_backend(name: str):
6
5
  if name == "none":
7
6
  return NoneBackend()
8
- if name == "zmlx":
9
- return ZMLXBackend()
10
7
  raise ValueError(f"Unknown accel backend: {name}")
@@ -20,11 +20,10 @@ import uuid
20
20
  from pathlib import Path
21
21
  from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
22
22
 
23
- from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request, Security, status
23
+ from fastapi import APIRouter, FastAPI, HTTPException, Request, Security, status
24
24
  from starlette.middleware.base import BaseHTTPMiddleware
25
25
  from fastapi.responses import StreamingResponse
26
26
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
27
- from pydantic import BaseModel
28
27
 
29
28
  from .schemas import (
30
29
  AdapterReloadRequest,
@@ -1102,12 +1101,10 @@ def create_router(
1102
1101
  models, use the list endpoint to check completion status.
1103
1102
  """
1104
1103
  cache_dir = _get_cache_dir()
1105
- local_path = cache_dir / "mlx" / request.model_id.replace("/", "__")
1106
-
1104
+
1107
1105
  try:
1108
1106
  # Import here to avoid circular dependencies
1109
1107
  from ..models import hf_pull
1110
- from ..config import ProjectConfig
1111
1108
 
1112
1109
  # Get HF token if available
1113
1110
  hf_token = None
@@ -43,7 +43,7 @@ from .envs import (
43
43
 
44
44
  app = typer.Typer(
45
45
  add_completion=False,
46
- help="mlxsmith — MLX fine-tuning + OpenAI-compatible serving (SFT stable; preference/RL experimental)",
46
+ help="mlxsmith — Apple Silicon MLX fine-tuning toolkit: SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.",
47
47
  )
48
48
  console = Console()
49
49
 
@@ -83,7 +83,6 @@ def doctor():
83
83
  table.add_row("cpu_count", str(info.cpu_count))
84
84
  table.add_row("metal", str(info.has_metal))
85
85
  table.add_row("mlx", f"{info.has_mlx} {info.mlx_version or ''}".strip())
86
- table.add_row("zmlx", str(info.has_zmlx))
87
86
  console.print(table)
88
87
 
89
88
 
@@ -564,7 +563,7 @@ def config_validate(
564
563
 
565
564
  try:
566
565
  cfg = load_config(cfg_path, require=True)
567
- console.print(f"[green]✓ Configuration is valid[/green]")
566
+ console.print("[green]✓ Configuration is valid[/green]")
568
567
 
569
568
  # Show summary
570
569
  table = Table(title="Configuration Summary")
@@ -593,9 +592,9 @@ def config_env(
593
592
  """Show available environment variables."""
594
593
  cfg = ProjectConfig()
595
594
 
596
- console.print(f"\n[bold]Environment Variable Configuration[/bold]")
595
+ console.print("\n[bold]Environment Variable Configuration[/bold]")
597
596
  console.print(f"Prefix: [cyan]{prefix}[/cyan]")
598
- console.print(f"Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
597
+ console.print("Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
599
598
 
600
599
  table = Table(title=f"Available {prefix}* Environment Variables")
601
600
  table.add_column("Environment Variable")
@@ -729,7 +728,7 @@ def rlm_history(limit: int = typer.Option(10, "--limit")):
729
728
 
730
729
  @accel_app.command("status")
731
730
  def accel_status():
732
- backends = ["none", "zmlx"]
731
+ backends = ["none"]
733
732
  table = Table(title="mlxsmith accel status")
734
733
  table.add_column("backend")
735
734
  table.add_column("available")
@@ -18,7 +18,6 @@ Config files support @path syntax:
18
18
  from __future__ import annotations
19
19
 
20
20
  import json
21
- import os
22
21
  from pathlib import Path
23
22
  from typing import Any, Dict, List, Optional, Tuple, Union
24
23
 
@@ -94,7 +93,7 @@ class ProjectSettings(BaseSettings):
94
93
 
95
94
 
96
95
  # Import CLI aliases from models
97
- from .config_models import CLI_ALIASES as _CLI_ALIASES
96
+ from .config_models import CLI_ALIASES as _CLI_ALIASES # noqa: E402
98
97
 
99
98
 
100
99
  def resolve_config_path(config: Union[str, Path], root: Optional[Path] = None) -> Path:
@@ -6,7 +6,7 @@ from typing import Dict, List, Literal, Optional, Any
6
6
 
7
7
  from pydantic import BaseModel, Field, field_validator
8
8
 
9
- AccelBackendName = Literal["none", "zmlx"]
9
+ AccelBackendName = Literal["none"]
10
10
 
11
11
 
12
12
  class ModelConfig(BaseModel):
@@ -182,11 +182,9 @@ def compute_logprobs(
182
182
  else:
183
183
  prompt_len = len(prompt_ids)
184
184
 
185
- # Get generation with logprobs
186
- full_text = backend.decode(ids)
187
-
188
- # Use backend's sequence_logprob if available
189
- seq_logprob = backend.sequence_logprob(ids, prompt_len=prompt_len)
185
+ # Decode and compute sequence-level logprob (used by callers via backend state)
186
+ backend.decode(ids)
187
+ backend.sequence_logprob(ids, prompt_len=prompt_len)
190
188
 
191
189
  # For per-token logprobs, we'd need to do a forward pass
192
190
  # This is a simplified version
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import random
4
4
  from dataclasses import dataclass
5
- from typing import Sequence, Any, List, Dict, Optional
5
+ from typing import Sequence, Any, List, Dict
6
6
 
7
7
  from .backend import Generation
8
8