mlxsmith 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlxsmith-0.1.2/PKG-INFO +283 -0
- mlxsmith-0.1.2/README.md +228 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/pyproject.toml +2 -3
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/accel/__init__.py +0 -3
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/api/handlers.py +2 -5
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/cli.py +5 -6
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/config.py +1 -2
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/config_models.py +1 -1
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/interface.py +3 -5
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/mock_backend.py +1 -1
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/daemon.py +4 -9
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/inference_worker.py +1 -1
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/queue.py +2 -2
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/trainer_worker.py +0 -2
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/loop.py +6 -8
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/weights.py +1 -1
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/future.py +2 -3
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/sampling_client.py +1 -2
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/training_client.py +3 -4
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/server.py +1 -6
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/lora.py +4 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/util.py +0 -6
- mlxsmith-0.1.2/src/mlxsmith.egg-info/PKG-INFO +283 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/SOURCES.txt +0 -1
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/requires.txt +0 -3
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_api.py +0 -4
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_sdk.py +2 -8
- mlxsmith-0.1.0/PKG-INFO +0 -163
- mlxsmith-0.1.0/README.md +0 -106
- mlxsmith-0.1.0/src/mlxsmith/accel/zmlx_backend.py +0 -42
- mlxsmith-0.1.0/src/mlxsmith.egg-info/PKG-INFO +0 -163
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/LICENSE +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/setup.cfg +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/accel/base.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/accel/none.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/adapters.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/api/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/api/schemas.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/auth.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/bench.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/data.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/envs/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/envs/system.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/envs/token_env.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/eval.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/infer.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/backend.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/mlx_lm_backend.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/llm/registry.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/models.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/orchestrator/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/corpus.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/gating.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/generate.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/history.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/inference.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/mutate.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/rlm/trainer.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/runs.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/sdk/losses.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/distill.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/pref.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/rft.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/train/sft.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/__init__.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/compose.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/docker_verifier.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/jsonschema.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/pytest_verifier.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/regex.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith/verifiers/types.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/dependency_links.txt +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/entry_points.txt +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/src/mlxsmith.egg-info/top_level.txt +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_auth.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_config.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_data.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_rlm.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_rlm_mutation.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_runs.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_training_smoke.py +0 -0
- {mlxsmith-0.1.0 → mlxsmith-0.1.2}/tests/test_verifiers.py +0 -0
mlxsmith-0.1.2/PKG-INFO
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlxsmith
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
|
|
5
|
+
Author-email: Shannon Labs <hmbown@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
|
|
8
|
+
Project-URL: Repository, https://github.com/Hmbown/MLXSmith
|
|
9
|
+
Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
|
|
10
|
+
Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: typer>=0.9.0
|
|
26
|
+
Requires-Dist: rich>=13.7.0
|
|
27
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
28
|
+
Requires-Dist: pydantic>=2.5.0
|
|
29
|
+
Requires-Dist: pydantic-settings>=2.2.1
|
|
30
|
+
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
31
|
+
Requires-Dist: huggingface_hub>=1.3.4
|
|
32
|
+
Requires-Dist: jsonschema>=4.21.0
|
|
33
|
+
Provides-Extra: mlx
|
|
34
|
+
Requires-Dist: mlx>=0.30.4; extra == "mlx"
|
|
35
|
+
Provides-Extra: llm
|
|
36
|
+
Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
|
|
37
|
+
Requires-Dist: transformers>=5.0.0; extra == "llm"
|
|
38
|
+
Requires-Dist: datasets>=3.0.0; extra == "llm"
|
|
39
|
+
Provides-Extra: serve
|
|
40
|
+
Requires-Dist: fastapi>=0.128.0; extra == "serve"
|
|
41
|
+
Requires-Dist: uvicorn>=0.40.0; extra == "serve"
|
|
42
|
+
Requires-Dist: httpx>=0.28.0; extra == "serve"
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: pytest>=9.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: ruff>=0.14.0; extra == "dev"
|
|
46
|
+
Provides-Extra: all
|
|
47
|
+
Requires-Dist: mlx>=0.30.4; extra == "all"
|
|
48
|
+
Requires-Dist: mlx-lm>=0.30.5; extra == "all"
|
|
49
|
+
Requires-Dist: transformers>=5.0.0; extra == "all"
|
|
50
|
+
Requires-Dist: datasets>=3.0.0; extra == "all"
|
|
51
|
+
Requires-Dist: fastapi>=0.128.0; extra == "all"
|
|
52
|
+
Requires-Dist: uvicorn>=0.40.0; extra == "all"
|
|
53
|
+
Requires-Dist: httpx>=0.28.0; extra == "all"
|
|
54
|
+
Dynamic: license-file
|
|
55
|
+
|
|
56
|
+
# mlxsmith
|
|
57
|
+
|
|
58
|
+
Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
|
|
59
|
+
|
|
60
|
+
**Status:** alpha (v0.1.2). Full training pipeline validated on Qwen3-4B.
|
|
61
|
+
|
|
62
|
+
## Install
|
|
63
|
+
|
|
64
|
+
MLX training and serving require macOS on Apple Silicon.
|
|
65
|
+
Other platforms can use data tools and mock backends.
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
python -m venv .venv && source .venv/bin/activate
|
|
69
|
+
pip install -U pip
|
|
70
|
+
|
|
71
|
+
# Core CLI (data tools, config, project scaffolding)
|
|
72
|
+
pip install mlxsmith
|
|
73
|
+
|
|
74
|
+
# Apple Silicon training + serving
|
|
75
|
+
pip install "mlxsmith[mlx,llm,serve]"
|
|
76
|
+
|
|
77
|
+
# Everything
|
|
78
|
+
pip install "mlxsmith[all]"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Quickstart
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
mlxsmith init myproj
|
|
85
|
+
cd myproj
|
|
86
|
+
mlxsmith doctor # check Python, MLX, Metal
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Training
|
|
90
|
+
|
|
91
|
+
### SFT (LoRA/QLoRA)
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
|
|
98
|
+
|
|
99
|
+
### Preference tuning (DPO/ORPO)
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
|
|
103
|
+
--data data/prefs --algo dpo
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
|
|
107
|
+
|
|
108
|
+
### Reinforced fine-tuning (GRPO)
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
|
|
112
|
+
--env envs/coding.yaml --verifier verifiers/pytest.py
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
|
|
116
|
+
|
|
117
|
+
### Knowledge distillation
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# Offline distillation (teacher generates, student learns)
|
|
121
|
+
mlxsmith distill --teacher large-model --student small-model --mode offline
|
|
122
|
+
|
|
123
|
+
# Online preference distillation (OPD)
|
|
124
|
+
mlxsmith distill --teacher large-model --student small-model --mode opd
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Full pipeline
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# Run SFT → Pref → RFT in sequence
|
|
131
|
+
mlxsmith pipeline
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Serving
|
|
135
|
+
|
|
136
|
+
OpenAI-compatible `/v1/chat/completions` endpoint.
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
mlxsmith serve --model runs/sft_0001/adapter --port 8080
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
curl http://localhost:8080/v1/chat/completions \
|
|
144
|
+
-H 'Content-Type: application/json' \
|
|
145
|
+
-d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
|
|
149
|
+
|
|
150
|
+
## Data tools
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
mlxsmith data presets # list built-in datasets
|
|
154
|
+
mlxsmith data pull alpaca # pull a preset
|
|
155
|
+
mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
|
|
156
|
+
mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
|
|
157
|
+
mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
|
|
158
|
+
mlxsmith data validate data/sft/train.jsonl # structure check
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
|
|
162
|
+
|
|
163
|
+
## Model management
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# Pull + convert HF model to MLX
|
|
167
|
+
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
|
|
168
|
+
|
|
169
|
+
# With quantization
|
|
170
|
+
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
|
|
171
|
+
|
|
172
|
+
# Merge adapters
|
|
173
|
+
mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## HF auth
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
mlxsmith auth login --token "$HF_TOKEN"
|
|
180
|
+
mlxsmith auth status
|
|
181
|
+
mlxsmith auth logout
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Eval and bench
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Evaluation suite (pass@k with verifier checks)
|
|
188
|
+
mlxsmith eval --suite eval/suites/coding.yaml
|
|
189
|
+
|
|
190
|
+
# Benchmark inference or training throughput
|
|
191
|
+
mlxsmith bench --mode inference
|
|
192
|
+
mlxsmith bench --mode trainer
|
|
193
|
+
mlxsmith bench --mode end_to_end
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Verifiers
|
|
197
|
+
|
|
198
|
+
Built-in verifiers for eval, RFT, and preference tuning:
|
|
199
|
+
|
|
200
|
+
- **regex** — pattern matching on completions
|
|
201
|
+
- **jsonschema** — JSON structure validation
|
|
202
|
+
- **pytest** — sandboxed test execution
|
|
203
|
+
- **docker** — containerized verification
|
|
204
|
+
- **compose** — multi-verifier composition (AND/OR/weighted)
|
|
205
|
+
|
|
206
|
+
See `docs/VERIFIERS.md` for the verifier API.
|
|
207
|
+
|
|
208
|
+
## Environment plugin system
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
mlxsmith env list # list available environments
|
|
212
|
+
mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
|
|
213
|
+
mlxsmith env init my_env # scaffold a new environment
|
|
214
|
+
mlxsmith env install ./my_env # install from directory
|
|
215
|
+
mlxsmith env package ./my_env # create distributable tarball
|
|
216
|
+
mlxsmith env run envs/coding.yaml # execute RFT with this environment
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
|
|
220
|
+
|
|
221
|
+
## Config system
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
mlxsmith config show # display merged config (YAML/JSON/TOML)
|
|
225
|
+
mlxsmith config show --sources # show where each value comes from
|
|
226
|
+
mlxsmith config init # create default mlxsmith.yaml
|
|
227
|
+
mlxsmith config validate # check config structure
|
|
228
|
+
mlxsmith config env # show environment variable mapping
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
|
|
232
|
+
|
|
233
|
+
## SDK (programmatic API)
|
|
234
|
+
|
|
235
|
+
For building custom training loops:
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
|
|
239
|
+
|
|
240
|
+
loaded = load_model("path/to/model", config)
|
|
241
|
+
|
|
242
|
+
# Sampling with logprobs
|
|
243
|
+
sampler = SamplingClient(loaded.backend)
|
|
244
|
+
result = sampler.sample("prompt", logprobs_k=5)
|
|
245
|
+
|
|
246
|
+
# Training operations
|
|
247
|
+
trainer = TrainingClient(loaded.backend)
|
|
248
|
+
trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
|
|
249
|
+
fb = trainer.forward_backward(batch)
|
|
250
|
+
trainer.optim_step(fb.result().grads)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
|
|
254
|
+
|
|
255
|
+
## Research
|
|
256
|
+
|
|
257
|
+
### RLM self-play loop
|
|
258
|
+
|
|
259
|
+
RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
mlxsmith rlm # single-process RLM
|
|
263
|
+
mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
|
|
264
|
+
mlxsmith rlm status # check iteration state
|
|
265
|
+
mlxsmith rlm history # view history
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
|
|
269
|
+
|
|
270
|
+
## Docs
|
|
271
|
+
|
|
272
|
+
- `docs/PROJECT_FORMAT.md` — project layout and artifacts
|
|
273
|
+
- `docs/VERIFIERS.md` — verifier API and sandbox behavior
|
|
274
|
+
- `docs/COMPATIBILITY.md` — tested versions and model families
|
|
275
|
+
- `docs/ENVIRONMENTS.md` — environment plugin system
|
|
276
|
+
- `docs/orchestrator.md` — multi-process RLM orchestrator
|
|
277
|
+
- `docs/rlm-ctl.md` — RLM training guide
|
|
278
|
+
- `docs/ROADMAP.md` — product direction and milestones
|
|
279
|
+
- `docs/README.md` — full docs index
|
|
280
|
+
|
|
281
|
+
## License
|
|
282
|
+
|
|
283
|
+
MIT
|
mlxsmith-0.1.2/README.md
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# mlxsmith
|
|
2
|
+
|
|
3
|
+
Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
|
|
4
|
+
|
|
5
|
+
**Status:** alpha (v0.1.2). Full training pipeline validated on Qwen3-4B.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
MLX training and serving require macOS on Apple Silicon.
|
|
10
|
+
Other platforms can use data tools and mock backends.
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
python -m venv .venv && source .venv/bin/activate
|
|
14
|
+
pip install -U pip
|
|
15
|
+
|
|
16
|
+
# Core CLI (data tools, config, project scaffolding)
|
|
17
|
+
pip install mlxsmith
|
|
18
|
+
|
|
19
|
+
# Apple Silicon training + serving
|
|
20
|
+
pip install "mlxsmith[mlx,llm,serve]"
|
|
21
|
+
|
|
22
|
+
# Everything
|
|
23
|
+
pip install "mlxsmith[all]"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quickstart
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
mlxsmith init myproj
|
|
30
|
+
cd myproj
|
|
31
|
+
mlxsmith doctor # check Python, MLX, Metal
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Training
|
|
35
|
+
|
|
36
|
+
### SFT (LoRA/QLoRA)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
|
|
43
|
+
|
|
44
|
+
### Preference tuning (DPO/ORPO)
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
|
|
48
|
+
--data data/prefs --algo dpo
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
|
|
52
|
+
|
|
53
|
+
### Reinforced fine-tuning (GRPO)
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
|
|
57
|
+
--env envs/coding.yaml --verifier verifiers/pytest.py
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
|
|
61
|
+
|
|
62
|
+
### Knowledge distillation
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Offline distillation (teacher generates, student learns)
|
|
66
|
+
mlxsmith distill --teacher large-model --student small-model --mode offline
|
|
67
|
+
|
|
68
|
+
# Online preference distillation (OPD)
|
|
69
|
+
mlxsmith distill --teacher large-model --student small-model --mode opd
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Full pipeline
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# Run SFT → Pref → RFT in sequence
|
|
76
|
+
mlxsmith pipeline
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Serving
|
|
80
|
+
|
|
81
|
+
OpenAI-compatible `/v1/chat/completions` endpoint.
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
mlxsmith serve --model runs/sft_0001/adapter --port 8080
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
curl http://localhost:8080/v1/chat/completions \
|
|
89
|
+
-H 'Content-Type: application/json' \
|
|
90
|
+
-d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
|
|
94
|
+
|
|
95
|
+
## Data tools
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
mlxsmith data presets # list built-in datasets
|
|
99
|
+
mlxsmith data pull alpaca # pull a preset
|
|
100
|
+
mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
|
|
101
|
+
mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
|
|
102
|
+
mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
|
|
103
|
+
mlxsmith data validate data/sft/train.jsonl # structure check
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
|
|
107
|
+
|
|
108
|
+
## Model management
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# Pull + convert HF model to MLX
|
|
112
|
+
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
|
|
113
|
+
|
|
114
|
+
# With quantization
|
|
115
|
+
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
|
|
116
|
+
|
|
117
|
+
# Merge adapters
|
|
118
|
+
mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## HF auth
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
mlxsmith auth login --token "$HF_TOKEN"
|
|
125
|
+
mlxsmith auth status
|
|
126
|
+
mlxsmith auth logout
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Eval and bench
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Evaluation suite (pass@k with verifier checks)
|
|
133
|
+
mlxsmith eval --suite eval/suites/coding.yaml
|
|
134
|
+
|
|
135
|
+
# Benchmark inference or training throughput
|
|
136
|
+
mlxsmith bench --mode inference
|
|
137
|
+
mlxsmith bench --mode trainer
|
|
138
|
+
mlxsmith bench --mode end_to_end
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Verifiers
|
|
142
|
+
|
|
143
|
+
Built-in verifiers for eval, RFT, and preference tuning:
|
|
144
|
+
|
|
145
|
+
- **regex** — pattern matching on completions
|
|
146
|
+
- **jsonschema** — JSON structure validation
|
|
147
|
+
- **pytest** — sandboxed test execution
|
|
148
|
+
- **docker** — containerized verification
|
|
149
|
+
- **compose** — multi-verifier composition (AND/OR/weighted)
|
|
150
|
+
|
|
151
|
+
See `docs/VERIFIERS.md` for the verifier API.
|
|
152
|
+
|
|
153
|
+
## Environment plugin system
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
mlxsmith env list # list available environments
|
|
157
|
+
mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
|
|
158
|
+
mlxsmith env init my_env # scaffold a new environment
|
|
159
|
+
mlxsmith env install ./my_env # install from directory
|
|
160
|
+
mlxsmith env package ./my_env # create distributable tarball
|
|
161
|
+
mlxsmith env run envs/coding.yaml # execute RFT with this environment
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
|
|
165
|
+
|
|
166
|
+
## Config system
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
mlxsmith config show # display merged config (YAML/JSON/TOML)
|
|
170
|
+
mlxsmith config show --sources # show where each value comes from
|
|
171
|
+
mlxsmith config init # create default mlxsmith.yaml
|
|
172
|
+
mlxsmith config validate # check config structure
|
|
173
|
+
mlxsmith config env # show environment variable mapping
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
|
|
177
|
+
|
|
178
|
+
## SDK (programmatic API)
|
|
179
|
+
|
|
180
|
+
For building custom training loops:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
|
|
184
|
+
|
|
185
|
+
loaded = load_model("path/to/model", config)
|
|
186
|
+
|
|
187
|
+
# Sampling with logprobs
|
|
188
|
+
sampler = SamplingClient(loaded.backend)
|
|
189
|
+
result = sampler.sample("prompt", logprobs_k=5)
|
|
190
|
+
|
|
191
|
+
# Training operations
|
|
192
|
+
trainer = TrainingClient(loaded.backend)
|
|
193
|
+
trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
|
|
194
|
+
fb = trainer.forward_backward(batch)
|
|
195
|
+
trainer.optim_step(fb.result().grads)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
|
|
199
|
+
|
|
200
|
+
## Research
|
|
201
|
+
|
|
202
|
+
### RLM self-play loop
|
|
203
|
+
|
|
204
|
+
RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
mlxsmith rlm # single-process RLM
|
|
208
|
+
mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
|
|
209
|
+
mlxsmith rlm status # check iteration state
|
|
210
|
+
mlxsmith rlm history # view history
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
|
|
214
|
+
|
|
215
|
+
## Docs
|
|
216
|
+
|
|
217
|
+
- `docs/PROJECT_FORMAT.md` — project layout and artifacts
|
|
218
|
+
- `docs/VERIFIERS.md` — verifier API and sandbox behavior
|
|
219
|
+
- `docs/COMPATIBILITY.md` — tested versions and model families
|
|
220
|
+
- `docs/ENVIRONMENTS.md` — environment plugin system
|
|
221
|
+
- `docs/orchestrator.md` — multi-process RLM orchestrator
|
|
222
|
+
- `docs/rlm-ctl.md` — RLM training guide
|
|
223
|
+
- `docs/ROADMAP.md` — product direction and milestones
|
|
224
|
+
- `docs/README.md` — full docs index
|
|
225
|
+
|
|
226
|
+
## License
|
|
227
|
+
|
|
228
|
+
MIT
|
|
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "mlxsmith"
|
|
7
|
-
version = "0.1.
|
|
8
|
-
description = "Apple Silicon MLX fine-tuning and OpenAI-compatible serving
|
|
7
|
+
version = "0.1.2"
|
|
8
|
+
description = "Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving."
|
|
9
9
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
10
10
|
requires-python = ">=3.10"
|
|
11
11
|
license = {text = "MIT"}
|
|
@@ -52,7 +52,6 @@ serve = [
|
|
|
52
52
|
"uvicorn>=0.40.0",
|
|
53
53
|
"httpx>=0.28.0",
|
|
54
54
|
]
|
|
55
|
-
zmlx = ["zmlx"]
|
|
56
55
|
dev = ["pytest>=9.0.0", "ruff>=0.14.0"]
|
|
57
56
|
all = [
|
|
58
57
|
"mlx>=0.30.4",
|
|
@@ -1,10 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from .none import NoneBackend
|
|
3
|
-
from .zmlx_backend import ZMLXBackend
|
|
4
3
|
|
|
5
4
|
def get_backend(name: str):
|
|
6
5
|
if name == "none":
|
|
7
6
|
return NoneBackend()
|
|
8
|
-
if name == "zmlx":
|
|
9
|
-
return ZMLXBackend()
|
|
10
7
|
raise ValueError(f"Unknown accel backend: {name}")
|
|
@@ -20,11 +20,10 @@ import uuid
|
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
|
|
22
22
|
|
|
23
|
-
from fastapi import APIRouter,
|
|
23
|
+
from fastapi import APIRouter, FastAPI, HTTPException, Request, Security, status
|
|
24
24
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
25
25
|
from fastapi.responses import StreamingResponse
|
|
26
26
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
27
|
-
from pydantic import BaseModel
|
|
28
27
|
|
|
29
28
|
from .schemas import (
|
|
30
29
|
AdapterReloadRequest,
|
|
@@ -1102,12 +1101,10 @@ def create_router(
|
|
|
1102
1101
|
models, use the list endpoint to check completion status.
|
|
1103
1102
|
"""
|
|
1104
1103
|
cache_dir = _get_cache_dir()
|
|
1105
|
-
|
|
1106
|
-
|
|
1104
|
+
|
|
1107
1105
|
try:
|
|
1108
1106
|
# Import here to avoid circular dependencies
|
|
1109
1107
|
from ..models import hf_pull
|
|
1110
|
-
from ..config import ProjectConfig
|
|
1111
1108
|
|
|
1112
1109
|
# Get HF token if available
|
|
1113
1110
|
hf_token = None
|
|
@@ -43,7 +43,7 @@ from .envs import (
|
|
|
43
43
|
|
|
44
44
|
app = typer.Typer(
|
|
45
45
|
add_completion=False,
|
|
46
|
-
help="mlxsmith — MLX fine-tuning
|
|
46
|
+
help="mlxsmith — Apple Silicon MLX fine-tuning toolkit: SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.",
|
|
47
47
|
)
|
|
48
48
|
console = Console()
|
|
49
49
|
|
|
@@ -83,7 +83,6 @@ def doctor():
|
|
|
83
83
|
table.add_row("cpu_count", str(info.cpu_count))
|
|
84
84
|
table.add_row("metal", str(info.has_metal))
|
|
85
85
|
table.add_row("mlx", f"{info.has_mlx} {info.mlx_version or ''}".strip())
|
|
86
|
-
table.add_row("zmlx", str(info.has_zmlx))
|
|
87
86
|
console.print(table)
|
|
88
87
|
|
|
89
88
|
|
|
@@ -564,7 +563,7 @@ def config_validate(
|
|
|
564
563
|
|
|
565
564
|
try:
|
|
566
565
|
cfg = load_config(cfg_path, require=True)
|
|
567
|
-
console.print(
|
|
566
|
+
console.print("[green]✓ Configuration is valid[/green]")
|
|
568
567
|
|
|
569
568
|
# Show summary
|
|
570
569
|
table = Table(title="Configuration Summary")
|
|
@@ -593,9 +592,9 @@ def config_env(
|
|
|
593
592
|
"""Show available environment variables."""
|
|
594
593
|
cfg = ProjectConfig()
|
|
595
594
|
|
|
596
|
-
console.print(
|
|
595
|
+
console.print("\n[bold]Environment Variable Configuration[/bold]")
|
|
597
596
|
console.print(f"Prefix: [cyan]{prefix}[/cyan]")
|
|
598
|
-
console.print(
|
|
597
|
+
console.print("Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
|
|
599
598
|
|
|
600
599
|
table = Table(title=f"Available {prefix}* Environment Variables")
|
|
601
600
|
table.add_column("Environment Variable")
|
|
@@ -729,7 +728,7 @@ def rlm_history(limit: int = typer.Option(10, "--limit")):
|
|
|
729
728
|
|
|
730
729
|
@accel_app.command("status")
|
|
731
730
|
def accel_status():
|
|
732
|
-
backends = ["none"
|
|
731
|
+
backends = ["none"]
|
|
733
732
|
table = Table(title="mlxsmith accel status")
|
|
734
733
|
table.add_column("backend")
|
|
735
734
|
table.add_column("available")
|
|
@@ -18,7 +18,6 @@ Config files support @path syntax:
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
import json
|
|
21
|
-
import os
|
|
22
21
|
from pathlib import Path
|
|
23
22
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
24
23
|
|
|
@@ -94,7 +93,7 @@ class ProjectSettings(BaseSettings):
|
|
|
94
93
|
|
|
95
94
|
|
|
96
95
|
# Import CLI aliases from models
|
|
97
|
-
from .config_models import CLI_ALIASES as _CLI_ALIASES
|
|
96
|
+
from .config_models import CLI_ALIASES as _CLI_ALIASES # noqa: E402
|
|
98
97
|
|
|
99
98
|
|
|
100
99
|
def resolve_config_path(config: Union[str, Path], root: Optional[Path] = None) -> Path:
|
|
@@ -182,11 +182,9 @@ def compute_logprobs(
|
|
|
182
182
|
else:
|
|
183
183
|
prompt_len = len(prompt_ids)
|
|
184
184
|
|
|
185
|
-
#
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
# Use backend's sequence_logprob if available
|
|
189
|
-
seq_logprob = backend.sequence_logprob(ids, prompt_len=prompt_len)
|
|
185
|
+
# Decode and compute sequence-level logprob (used by callers via backend state)
|
|
186
|
+
backend.decode(ids)
|
|
187
|
+
backend.sequence_logprob(ids, prompt_len=prompt_len)
|
|
190
188
|
|
|
191
189
|
# For per-token logprobs, we'd need to do a forward pass
|
|
192
190
|
# This is a simplified version
|