mlxsmith 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mlxsmith-0.1.1/PKG-INFO +293 -0
  2. mlxsmith-0.1.1/README.md +236 -0
  3. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/pyproject.toml +2 -2
  4. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/api/handlers.py +2 -5
  5. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/cli.py +4 -4
  6. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/config.py +1 -2
  7. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/llm/interface.py +3 -5
  8. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/llm/mock_backend.py +1 -1
  9. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/orchestrator/daemon.py +4 -9
  10. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/orchestrator/inference_worker.py +1 -1
  11. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/orchestrator/queue.py +2 -2
  12. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/orchestrator/trainer_worker.py +0 -2
  13. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/loop.py +6 -8
  14. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/weights.py +1 -1
  15. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/sdk/future.py +2 -3
  16. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/sdk/sampling_client.py +1 -2
  17. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/sdk/training_client.py +3 -4
  18. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/server.py +1 -6
  19. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/train/lora.py +4 -0
  20. mlxsmith-0.1.1/src/mlxsmith.egg-info/PKG-INFO +293 -0
  21. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_api.py +0 -4
  22. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_sdk.py +2 -8
  23. mlxsmith-0.1.0/PKG-INFO +0 -163
  24. mlxsmith-0.1.0/README.md +0 -106
  25. mlxsmith-0.1.0/src/mlxsmith.egg-info/PKG-INFO +0 -163
  26. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/LICENSE +0 -0
  27. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/setup.cfg +0 -0
  28. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/__init__.py +0 -0
  29. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/accel/__init__.py +0 -0
  30. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/accel/base.py +0 -0
  31. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/accel/none.py +0 -0
  32. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/accel/zmlx_backend.py +0 -0
  33. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/adapters.py +0 -0
  34. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/api/__init__.py +0 -0
  35. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/api/schemas.py +0 -0
  36. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/auth.py +0 -0
  37. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/bench.py +0 -0
  38. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/config_models.py +0 -0
  39. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/data.py +0 -0
  40. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/envs/__init__.py +0 -0
  41. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/envs/system.py +0 -0
  42. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/envs/token_env.py +0 -0
  43. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/eval.py +0 -0
  44. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/infer.py +0 -0
  45. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/llm/__init__.py +0 -0
  46. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/llm/backend.py +0 -0
  47. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/llm/mlx_lm_backend.py +0 -0
  48. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/llm/registry.py +0 -0
  49. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/models.py +0 -0
  50. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/orchestrator/__init__.py +0 -0
  51. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/__init__.py +0 -0
  52. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/corpus.py +0 -0
  53. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/gating.py +0 -0
  54. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/generate.py +0 -0
  55. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/history.py +0 -0
  56. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/inference.py +0 -0
  57. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/mutate.py +0 -0
  58. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/rlm/trainer.py +0 -0
  59. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/runs.py +0 -0
  60. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/sdk/__init__.py +0 -0
  61. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/sdk/losses.py +0 -0
  62. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/train/__init__.py +0 -0
  63. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/train/distill.py +0 -0
  64. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/train/pref.py +0 -0
  65. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/train/rft.py +0 -0
  66. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/train/sft.py +0 -0
  67. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/util.py +0 -0
  68. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/verifiers/__init__.py +0 -0
  69. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/verifiers/compose.py +0 -0
  70. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/verifiers/docker_verifier.py +0 -0
  71. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/verifiers/jsonschema.py +0 -0
  72. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/verifiers/pytest_verifier.py +0 -0
  73. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/verifiers/regex.py +0 -0
  74. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith/verifiers/types.py +0 -0
  75. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith.egg-info/SOURCES.txt +0 -0
  76. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith.egg-info/dependency_links.txt +0 -0
  77. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith.egg-info/entry_points.txt +0 -0
  78. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith.egg-info/requires.txt +0 -0
  79. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/src/mlxsmith.egg-info/top_level.txt +0 -0
  80. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_auth.py +0 -0
  81. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_config.py +0 -0
  82. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_data.py +0 -0
  83. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_rlm.py +0 -0
  84. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_rlm_mutation.py +0 -0
  85. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_runs.py +0 -0
  86. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_training_smoke.py +0 -0
  87. {mlxsmith-0.1.0 → mlxsmith-0.1.1}/tests/test_verifiers.py +0 -0
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: mlxsmith
3
+ Version: 0.1.1
4
+ Summary: Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
5
+ Author-email: Shannon Labs <hmbown@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
8
+ Project-URL: Repository, https://github.com/Hmbown/MLXSmith
9
+ Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
10
+ Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Operating System :: MacOS :: MacOS X
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: typer>=0.9.0
26
+ Requires-Dist: rich>=13.7.0
27
+ Requires-Dist: pyyaml>=6.0.1
28
+ Requires-Dist: pydantic>=2.5.0
29
+ Requires-Dist: pydantic-settings>=2.2.1
30
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
31
+ Requires-Dist: huggingface_hub>=1.3.4
32
+ Requires-Dist: jsonschema>=4.21.0
33
+ Provides-Extra: mlx
34
+ Requires-Dist: mlx>=0.30.4; extra == "mlx"
35
+ Provides-Extra: llm
36
+ Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
37
+ Requires-Dist: transformers>=5.0.0; extra == "llm"
38
+ Requires-Dist: datasets>=3.0.0; extra == "llm"
39
+ Provides-Extra: serve
40
+ Requires-Dist: fastapi>=0.128.0; extra == "serve"
41
+ Requires-Dist: uvicorn>=0.40.0; extra == "serve"
42
+ Requires-Dist: httpx>=0.28.0; extra == "serve"
43
+ Provides-Extra: zmlx
44
+ Requires-Dist: zmlx; extra == "zmlx"
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=9.0.0; extra == "dev"
47
+ Requires-Dist: ruff>=0.14.0; extra == "dev"
48
+ Provides-Extra: all
49
+ Requires-Dist: mlx>=0.30.4; extra == "all"
50
+ Requires-Dist: mlx-lm>=0.30.5; extra == "all"
51
+ Requires-Dist: transformers>=5.0.0; extra == "all"
52
+ Requires-Dist: datasets>=3.0.0; extra == "all"
53
+ Requires-Dist: fastapi>=0.128.0; extra == "all"
54
+ Requires-Dist: uvicorn>=0.40.0; extra == "all"
55
+ Requires-Dist: httpx>=0.28.0; extra == "all"
56
+ Dynamic: license-file
57
+
58
+ # mlxsmith
59
+
60
+ Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
61
+
62
+ **Status:** alpha (v0.1.0). Full training pipeline validated on Qwen3-4B.
63
+
64
+ ## Install
65
+
66
+ MLX training and serving require macOS on Apple Silicon.
67
+ Other platforms can use data tools and mock backends.
68
+
69
+ ```bash
70
+ python -m venv .venv && source .venv/bin/activate
71
+ pip install -U pip
72
+
73
+ # Core CLI (data tools, config, project scaffolding)
74
+ pip install mlxsmith
75
+
76
+ # Apple Silicon training + serving
77
+ pip install "mlxsmith[mlx,llm,serve]"
78
+
79
+ # Everything
80
+ pip install "mlxsmith[all]"
81
+ ```
82
+
83
+ ## Quickstart
84
+
85
+ ```bash
86
+ mlxsmith init myproj
87
+ cd myproj
88
+ mlxsmith doctor # check Python, MLX, Metal, ZMLX
89
+ ```
90
+
91
+ ## Training
92
+
93
+ ### SFT (LoRA/QLoRA)
94
+
95
+ ```bash
96
+ mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
97
+ ```
98
+
99
+ Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
100
+
101
+ ### Preference tuning (DPO/ORPO)
102
+
103
+ ```bash
104
+ mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
105
+ --data data/prefs --algo dpo
106
+ ```
107
+
108
+ Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
109
+
110
+ ### Reinforced fine-tuning (GRPO)
111
+
112
+ ```bash
113
+ mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
114
+ --env envs/coding.yaml --verifier verifiers/pytest.py
115
+ ```
116
+
117
+ GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
118
+
119
+ ### Knowledge distillation
120
+
121
+ ```bash
122
+ # Offline distillation (teacher generates, student learns)
123
+ mlxsmith distill --teacher large-model --student small-model --mode offline
124
+
125
+ # Online preference distillation (OPD)
126
+ mlxsmith distill --teacher large-model --student small-model --mode opd
127
+ ```
128
+
129
+ ### Full pipeline
130
+
131
+ ```bash
132
+ # Run SFT → Pref → RFT in sequence
133
+ mlxsmith pipeline
134
+ ```
135
+
136
+ ## Serving
137
+
138
+ OpenAI-compatible `/v1/chat/completions` endpoint.
139
+
140
+ ```bash
141
+ mlxsmith serve --model runs/sft_0001/adapter --port 8080
142
+ ```
143
+
144
+ ```bash
145
+ curl http://localhost:8080/v1/chat/completions \
146
+ -H 'Content-Type: application/json' \
147
+ -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
148
+ ```
149
+
150
+ Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
151
+
152
+ ## Data tools
153
+
154
+ ```bash
155
+ mlxsmith data presets # list built-in datasets
156
+ mlxsmith data pull alpaca # pull a preset
157
+ mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
158
+ mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
159
+ mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
160
+ mlxsmith data validate data/sft/train.jsonl # structure check
161
+ ```
162
+
163
+ Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
164
+
165
+ ## Model management
166
+
167
+ ```bash
168
+ # Pull + convert HF model to MLX
169
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
170
+
171
+ # With quantization
172
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
173
+
174
+ # Merge adapters
175
+ mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
176
+ ```
177
+
178
+ ## HF auth
179
+
180
+ ```bash
181
+ mlxsmith auth login --token "$HF_TOKEN"
182
+ mlxsmith auth status
183
+ mlxsmith auth logout
184
+ ```
185
+
186
+ ## Eval and bench
187
+
188
+ ```bash
189
+ # Evaluation suite (pass@k with verifier checks)
190
+ mlxsmith eval --suite eval/suites/coding.yaml
191
+
192
+ # Benchmark inference or training throughput
193
+ mlxsmith bench --mode inference
194
+ mlxsmith bench --mode trainer
195
+ mlxsmith bench --mode end_to_end
196
+ ```
197
+
198
+ ## Verifiers
199
+
200
+ Built-in verifiers for eval, RFT, and preference tuning:
201
+
202
+ - **regex** — pattern matching on completions
203
+ - **jsonschema** — JSON structure validation
204
+ - **pytest** — sandboxed test execution
205
+ - **docker** — containerized verification
206
+ - **compose** — multi-verifier composition (AND/OR/weighted)
207
+
208
+ See `docs/VERIFIERS.md` for the verifier API.
209
+
210
+ ## Environment plugin system
211
+
212
+ ```bash
213
+ mlxsmith env list # list available environments
214
+ mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
215
+ mlxsmith env init my_env # scaffold a new environment
216
+ mlxsmith env install ./my_env # install from directory
217
+ mlxsmith env package ./my_env # create distributable tarball
218
+ mlxsmith env run envs/coding.yaml # execute RFT with this environment
219
+ ```
220
+
221
+ Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
222
+
223
+ ## Config system
224
+
225
+ ```bash
226
+ mlxsmith config show # display merged config (YAML/JSON/TOML)
227
+ mlxsmith config show --sources # show where each value comes from
228
+ mlxsmith config init # create default mlxsmith.yaml
229
+ mlxsmith config validate # check config structure
230
+ mlxsmith config env # show environment variable mapping
231
+ ```
232
+
233
+ Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
234
+
235
+ ## SDK (programmatic API)
236
+
237
+ For building custom training loops:
238
+
239
+ ```python
240
+ from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
241
+
242
+ loaded = load_model("path/to/model", config)
243
+
244
+ # Sampling with logprobs
245
+ sampler = SamplingClient(loaded.backend)
246
+ result = sampler.sample("prompt", logprobs_k=5)
247
+
248
+ # Training operations
249
+ trainer = TrainingClient(loaded.backend)
250
+ trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
251
+ fb = trainer.forward_backward(batch)
252
+ trainer.optim_step(fb.result().grads)
253
+ ```
254
+
255
+ Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
256
+
257
+ ## Research
258
+
259
+ ### RLM self-play loop
260
+
261
+ RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
262
+
263
+ ```bash
264
+ mlxsmith rlm # single-process RLM
265
+ mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
266
+ mlxsmith rlm status # check iteration state
267
+ mlxsmith rlm history # view history
268
+ ```
269
+
270
+ Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
271
+
272
+ ### ZMLX acceleration
273
+
274
+ Optional zero-copy MLX acceleration backend.
275
+
276
+ ```bash
277
+ mlxsmith accel status
278
+ ```
279
+
280
+ ## Docs
281
+
282
+ - `docs/PROJECT_FORMAT.md` — project layout and artifacts
283
+ - `docs/VERIFIERS.md` — verifier API and sandbox behavior
284
+ - `docs/COMPATIBILITY.md` — tested versions and model families
285
+ - `docs/ENVIRONMENTS.md` — environment plugin system
286
+ - `docs/orchestrator.md` — multi-process RLM orchestrator
287
+ - `docs/rlm-ctl.md` — RLM training guide
288
+ - `docs/ROADMAP.md` — product direction and milestones
289
+ - `docs/README.md` — full docs index
290
+
291
+ ## License
292
+
293
+ MIT
@@ -0,0 +1,236 @@
1
+ # mlxsmith
2
+
3
+ Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
4
+
5
+ **Status:** alpha (v0.1.0). Full training pipeline validated on Qwen3-4B.
6
+
7
+ ## Install
8
+
9
+ MLX training and serving require macOS on Apple Silicon.
10
+ Other platforms can use data tools and mock backends.
11
+
12
+ ```bash
13
+ python -m venv .venv && source .venv/bin/activate
14
+ pip install -U pip
15
+
16
+ # Core CLI (data tools, config, project scaffolding)
17
+ pip install mlxsmith
18
+
19
+ # Apple Silicon training + serving
20
+ pip install "mlxsmith[mlx,llm,serve]"
21
+
22
+ # Everything
23
+ pip install "mlxsmith[all]"
24
+ ```
25
+
26
+ ## Quickstart
27
+
28
+ ```bash
29
+ mlxsmith init myproj
30
+ cd myproj
31
+ mlxsmith doctor # check Python, MLX, Metal, ZMLX
32
+ ```
33
+
34
+ ## Training
35
+
36
+ ### SFT (LoRA/QLoRA)
37
+
38
+ ```bash
39
+ mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
40
+ ```
41
+
42
+ Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
43
+
44
+ ### Preference tuning (DPO/ORPO)
45
+
46
+ ```bash
47
+ mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
48
+ --data data/prefs --algo dpo
49
+ ```
50
+
51
+ Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
52
+
53
+ ### Reinforced fine-tuning (GRPO)
54
+
55
+ ```bash
56
+ mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
57
+ --env envs/coding.yaml --verifier verifiers/pytest.py
58
+ ```
59
+
60
+ GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
61
+
62
+ ### Knowledge distillation
63
+
64
+ ```bash
65
+ # Offline distillation (teacher generates, student learns)
66
+ mlxsmith distill --teacher large-model --student small-model --mode offline
67
+
68
+ # Online preference distillation (OPD)
69
+ mlxsmith distill --teacher large-model --student small-model --mode opd
70
+ ```
71
+
72
+ ### Full pipeline
73
+
74
+ ```bash
75
+ # Run SFT → Pref → RFT in sequence
76
+ mlxsmith pipeline
77
+ ```
78
+
79
+ ## Serving
80
+
81
+ OpenAI-compatible `/v1/chat/completions` endpoint.
82
+
83
+ ```bash
84
+ mlxsmith serve --model runs/sft_0001/adapter --port 8080
85
+ ```
86
+
87
+ ```bash
88
+ curl http://localhost:8080/v1/chat/completions \
89
+ -H 'Content-Type: application/json' \
90
+ -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
91
+ ```
92
+
93
+ Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
94
+
95
+ ## Data tools
96
+
97
+ ```bash
98
+ mlxsmith data presets # list built-in datasets
99
+ mlxsmith data pull alpaca # pull a preset
100
+ mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
101
+ mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
102
+ mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
103
+ mlxsmith data validate data/sft/train.jsonl # structure check
104
+ ```
105
+
106
+ Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
107
+
108
+ ## Model management
109
+
110
+ ```bash
111
+ # Pull + convert HF model to MLX
112
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
113
+
114
+ # With quantization
115
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
116
+
117
+ # Merge adapters
118
+ mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
119
+ ```
120
+
121
+ ## HF auth
122
+
123
+ ```bash
124
+ mlxsmith auth login --token "$HF_TOKEN"
125
+ mlxsmith auth status
126
+ mlxsmith auth logout
127
+ ```
128
+
129
+ ## Eval and bench
130
+
131
+ ```bash
132
+ # Evaluation suite (pass@k with verifier checks)
133
+ mlxsmith eval --suite eval/suites/coding.yaml
134
+
135
+ # Benchmark inference or training throughput
136
+ mlxsmith bench --mode inference
137
+ mlxsmith bench --mode trainer
138
+ mlxsmith bench --mode end_to_end
139
+ ```
140
+
141
+ ## Verifiers
142
+
143
+ Built-in verifiers for eval, RFT, and preference tuning:
144
+
145
+ - **regex** — pattern matching on completions
146
+ - **jsonschema** — JSON structure validation
147
+ - **pytest** — sandboxed test execution
148
+ - **docker** — containerized verification
149
+ - **compose** — multi-verifier composition (AND/OR/weighted)
150
+
151
+ See `docs/VERIFIERS.md` for the verifier API.
152
+
153
+ ## Environment plugin system
154
+
155
+ ```bash
156
+ mlxsmith env list # list available environments
157
+ mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
158
+ mlxsmith env init my_env # scaffold a new environment
159
+ mlxsmith env install ./my_env # install from directory
160
+ mlxsmith env package ./my_env # create distributable tarball
161
+ mlxsmith env run envs/coding.yaml # execute RFT with this environment
162
+ ```
163
+
164
+ Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
165
+
166
+ ## Config system
167
+
168
+ ```bash
169
+ mlxsmith config show # display merged config (YAML/JSON/TOML)
170
+ mlxsmith config show --sources # show where each value comes from
171
+ mlxsmith config init # create default mlxsmith.yaml
172
+ mlxsmith config validate # check config structure
173
+ mlxsmith config env # show environment variable mapping
174
+ ```
175
+
176
+ Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
177
+
178
+ ## SDK (programmatic API)
179
+
180
+ For building custom training loops:
181
+
182
+ ```python
183
+ from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
184
+
185
+ loaded = load_model("path/to/model", config)
186
+
187
+ # Sampling with logprobs
188
+ sampler = SamplingClient(loaded.backend)
189
+ result = sampler.sample("prompt", logprobs_k=5)
190
+
191
+ # Training operations
192
+ trainer = TrainingClient(loaded.backend)
193
+ trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
194
+ fb = trainer.forward_backward(batch)
195
+ trainer.optim_step(fb.result().grads)
196
+ ```
197
+
198
+ Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
199
+
200
+ ## Research
201
+
202
+ ### RLM self-play loop
203
+
204
+ RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
205
+
206
+ ```bash
207
+ mlxsmith rlm # single-process RLM
208
+ mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
209
+ mlxsmith rlm status # check iteration state
210
+ mlxsmith rlm history # view history
211
+ ```
212
+
213
+ Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
214
+
215
+ ### ZMLX acceleration
216
+
217
+ Optional zero-copy MLX acceleration backend.
218
+
219
+ ```bash
220
+ mlxsmith accel status
221
+ ```
222
+
223
+ ## Docs
224
+
225
+ - `docs/PROJECT_FORMAT.md` — project layout and artifacts
226
+ - `docs/VERIFIERS.md` — verifier API and sandbox behavior
227
+ - `docs/COMPATIBILITY.md` — tested versions and model families
228
+ - `docs/ENVIRONMENTS.md` — environment plugin system
229
+ - `docs/orchestrator.md` — multi-process RLM orchestrator
230
+ - `docs/rlm-ctl.md` — RLM training guide
231
+ - `docs/ROADMAP.md` — product direction and milestones
232
+ - `docs/README.md` — full docs index
233
+
234
+ ## License
235
+
236
+ MIT
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mlxsmith"
7
- version = "0.1.0"
8
- description = "Apple Silicon MLX fine-tuning and OpenAI-compatible serving (SFT stable; preference/RL experimental)."
7
+ version = "0.1.1"
8
+ description = "Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving."
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  requires-python = ">=3.10"
11
11
  license = {text = "MIT"}
@@ -20,11 +20,10 @@ import uuid
20
20
  from pathlib import Path
21
21
  from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
22
22
 
23
- from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request, Security, status
23
+ from fastapi import APIRouter, FastAPI, HTTPException, Request, Security, status
24
24
  from starlette.middleware.base import BaseHTTPMiddleware
25
25
  from fastapi.responses import StreamingResponse
26
26
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
27
- from pydantic import BaseModel
28
27
 
29
28
  from .schemas import (
30
29
  AdapterReloadRequest,
@@ -1102,12 +1101,10 @@ def create_router(
1102
1101
  models, use the list endpoint to check completion status.
1103
1102
  """
1104
1103
  cache_dir = _get_cache_dir()
1105
- local_path = cache_dir / "mlx" / request.model_id.replace("/", "__")
1106
-
1104
+
1107
1105
  try:
1108
1106
  # Import here to avoid circular dependencies
1109
1107
  from ..models import hf_pull
1110
- from ..config import ProjectConfig
1111
1108
 
1112
1109
  # Get HF token if available
1113
1110
  hf_token = None
@@ -43,7 +43,7 @@ from .envs import (
43
43
 
44
44
  app = typer.Typer(
45
45
  add_completion=False,
46
- help="mlxsmith — MLX fine-tuning + OpenAI-compatible serving (SFT stable; preference/RL experimental)",
46
+ help="mlxsmith — Apple Silicon MLX fine-tuning toolkit: SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.",
47
47
  )
48
48
  console = Console()
49
49
 
@@ -564,7 +564,7 @@ def config_validate(
564
564
 
565
565
  try:
566
566
  cfg = load_config(cfg_path, require=True)
567
- console.print(f"[green]✓ Configuration is valid[/green]")
567
+ console.print("[green]✓ Configuration is valid[/green]")
568
568
 
569
569
  # Show summary
570
570
  table = Table(title="Configuration Summary")
@@ -593,9 +593,9 @@ def config_env(
593
593
  """Show available environment variables."""
594
594
  cfg = ProjectConfig()
595
595
 
596
- console.print(f"\n[bold]Environment Variable Configuration[/bold]")
596
+ console.print("\n[bold]Environment Variable Configuration[/bold]")
597
597
  console.print(f"Prefix: [cyan]{prefix}[/cyan]")
598
- console.print(f"Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
598
+ console.print("Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
599
599
 
600
600
  table = Table(title=f"Available {prefix}* Environment Variables")
601
601
  table.add_column("Environment Variable")
@@ -18,7 +18,6 @@ Config files support @path syntax:
18
18
  from __future__ import annotations
19
19
 
20
20
  import json
21
- import os
22
21
  from pathlib import Path
23
22
  from typing import Any, Dict, List, Optional, Tuple, Union
24
23
 
@@ -94,7 +93,7 @@ class ProjectSettings(BaseSettings):
94
93
 
95
94
 
96
95
  # Import CLI aliases from models
97
- from .config_models import CLI_ALIASES as _CLI_ALIASES
96
+ from .config_models import CLI_ALIASES as _CLI_ALIASES # noqa: E402
98
97
 
99
98
 
100
99
  def resolve_config_path(config: Union[str, Path], root: Optional[Path] = None) -> Path:
@@ -182,11 +182,9 @@ def compute_logprobs(
182
182
  else:
183
183
  prompt_len = len(prompt_ids)
184
184
 
185
- # Get generation with logprobs
186
- full_text = backend.decode(ids)
187
-
188
- # Use backend's sequence_logprob if available
189
- seq_logprob = backend.sequence_logprob(ids, prompt_len=prompt_len)
185
+ # Decode and compute sequence-level logprob (used by callers via backend state)
186
+ backend.decode(ids)
187
+ backend.sequence_logprob(ids, prompt_len=prompt_len)
190
188
 
191
189
  # For per-token logprobs, we'd need to do a forward pass
192
190
  # This is a simplified version
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import random
4
4
  from dataclasses import dataclass
5
- from typing import Sequence, Any, List, Dict, Optional
5
+ from typing import Sequence, Any, List, Dict
6
6
 
7
7
  from .backend import Generation
8
8
 
@@ -9,27 +9,22 @@ Manages rollout requests, training batches, and weight updates.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
- import json
13
12
  import multiprocessing as mp
14
13
  import signal
15
- import sys
16
14
  import time
17
15
  import traceback
18
16
  from dataclasses import dataclass, field
19
17
  from pathlib import Path
20
- from typing import Any, Dict, List, Optional, Callable
18
+ from typing import Any, Dict, List, Optional
21
19
 
22
20
  from rich.console import Console
23
21
 
24
22
  from ..config import ProjectConfig
25
- from ..rlm.corpus import append_corpus, load_corpus, sample_corpus
26
- from ..rlm.gating import load_state, save_state, should_accept, update_state
27
- from ..rlm.history import append_history
28
- from ..rlm.inference import Rollout, build_tasks
23
+ from ..rlm.gating import load_state
29
24
  from ..rlm.weights import WeightPointerStore, WeightPointerIPC
30
25
  from ..runs import new_run, snapshot_config
31
- from ..util import ensure_dir, now_ts, write_jsonl
32
- from .queue import MessageQueue, MessageType, Message
26
+ from ..util import ensure_dir, now_ts
27
+ from .queue import MessageQueue, MessageType
33
28
  from .inference_worker import InferenceConfig, run_inference_worker
34
29
  from .trainer_worker import TrainerConfig, run_trainer_worker
35
30
 
@@ -26,7 +26,7 @@ from fastapi.responses import StreamingResponse
26
26
  from ..config import ProjectConfig
27
27
  from ..llm.registry import get_llm_backend
28
28
  from ..models import resolve_model_spec
29
- from ..rlm.weights import WeightPointerStore, WeightPointerIPC
29
+ from ..rlm.weights import WeightPointerStore
30
30
  from .queue import MessageQueue, MessageType, Message
31
31
 
32
32
 
@@ -11,11 +11,11 @@ import json
11
11
  import multiprocessing as mp
12
12
  import time
13
13
  import uuid
14
- from dataclasses import asdict, dataclass, field
14
+ from dataclasses import dataclass, field
15
15
  from enum import Enum, auto
16
16
  from pathlib import Path
17
17
  from queue import Empty
18
- from typing import Any, Dict, List, Optional, Union
18
+ from typing import Any, Dict, List, Optional
19
19
 
20
20
 
21
21
  class MessageType(Enum):