mlxsmith 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlxsmith/api/handlers.py CHANGED
@@ -20,11 +20,10 @@ import uuid
20
20
  from pathlib import Path
21
21
  from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
22
22
 
23
- from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request, Security, status
23
+ from fastapi import APIRouter, FastAPI, HTTPException, Request, Security, status
24
24
  from starlette.middleware.base import BaseHTTPMiddleware
25
25
  from fastapi.responses import StreamingResponse
26
26
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
27
- from pydantic import BaseModel
28
27
 
29
28
  from .schemas import (
30
29
  AdapterReloadRequest,
@@ -1102,12 +1101,10 @@ def create_router(
1102
1101
  models, use the list endpoint to check completion status.
1103
1102
  """
1104
1103
  cache_dir = _get_cache_dir()
1105
- local_path = cache_dir / "mlx" / request.model_id.replace("/", "__")
1106
-
1104
+
1107
1105
  try:
1108
1106
  # Import here to avoid circular dependencies
1109
1107
  from ..models import hf_pull
1110
- from ..config import ProjectConfig
1111
1108
 
1112
1109
  # Get HF token if available
1113
1110
  hf_token = None
mlxsmith/cli.py CHANGED
@@ -43,7 +43,7 @@ from .envs import (
43
43
 
44
44
  app = typer.Typer(
45
45
  add_completion=False,
46
- help="mlxsmith — MLX fine-tuning + OpenAI-compatible serving (SFT stable; preference/RL experimental)",
46
+ help="mlxsmith — Apple Silicon MLX fine-tuning toolkit: SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.",
47
47
  )
48
48
  console = Console()
49
49
 
@@ -564,7 +564,7 @@ def config_validate(
564
564
 
565
565
  try:
566
566
  cfg = load_config(cfg_path, require=True)
567
- console.print(f"[green]✓ Configuration is valid[/green]")
567
+ console.print("[green]✓ Configuration is valid[/green]")
568
568
 
569
569
  # Show summary
570
570
  table = Table(title="Configuration Summary")
@@ -593,9 +593,9 @@ def config_env(
593
593
  """Show available environment variables."""
594
594
  cfg = ProjectConfig()
595
595
 
596
- console.print(f"\n[bold]Environment Variable Configuration[/bold]")
596
+ console.print("\n[bold]Environment Variable Configuration[/bold]")
597
597
  console.print(f"Prefix: [cyan]{prefix}[/cyan]")
598
- console.print(f"Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
598
+ console.print("Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
599
599
 
600
600
  table = Table(title=f"Available {prefix}* Environment Variables")
601
601
  table.add_column("Environment Variable")
mlxsmith/config.py CHANGED
@@ -18,7 +18,6 @@ Config files support @path syntax:
18
18
  from __future__ import annotations
19
19
 
20
20
  import json
21
- import os
22
21
  from pathlib import Path
23
22
  from typing import Any, Dict, List, Optional, Tuple, Union
24
23
 
@@ -94,7 +93,7 @@ class ProjectSettings(BaseSettings):
94
93
 
95
94
 
96
95
  # Import CLI aliases from models
97
- from .config_models import CLI_ALIASES as _CLI_ALIASES
96
+ from .config_models import CLI_ALIASES as _CLI_ALIASES # noqa: E402
98
97
 
99
98
 
100
99
  def resolve_config_path(config: Union[str, Path], root: Optional[Path] = None) -> Path:
mlxsmith/llm/interface.py CHANGED
@@ -182,11 +182,9 @@ def compute_logprobs(
182
182
  else:
183
183
  prompt_len = len(prompt_ids)
184
184
 
185
- # Get generation with logprobs
186
- full_text = backend.decode(ids)
187
-
188
- # Use backend's sequence_logprob if available
189
- seq_logprob = backend.sequence_logprob(ids, prompt_len=prompt_len)
185
+ # Decode and compute sequence-level logprob (used by callers via backend state)
186
+ backend.decode(ids)
187
+ backend.sequence_logprob(ids, prompt_len=prompt_len)
190
188
 
191
189
  # For per-token logprobs, we'd need to do a forward pass
192
190
  # This is a simplified version
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import random
4
4
  from dataclasses import dataclass
5
- from typing import Sequence, Any, List, Dict, Optional
5
+ from typing import Sequence, Any, List, Dict
6
6
 
7
7
  from .backend import Generation
8
8
 
@@ -9,27 +9,22 @@ Manages rollout requests, training batches, and weight updates.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
- import json
13
12
  import multiprocessing as mp
14
13
  import signal
15
- import sys
16
14
  import time
17
15
  import traceback
18
16
  from dataclasses import dataclass, field
19
17
  from pathlib import Path
20
- from typing import Any, Dict, List, Optional, Callable
18
+ from typing import Any, Dict, List, Optional
21
19
 
22
20
  from rich.console import Console
23
21
 
24
22
  from ..config import ProjectConfig
25
- from ..rlm.corpus import append_corpus, load_corpus, sample_corpus
26
- from ..rlm.gating import load_state, save_state, should_accept, update_state
27
- from ..rlm.history import append_history
28
- from ..rlm.inference import Rollout, build_tasks
23
+ from ..rlm.gating import load_state
29
24
  from ..rlm.weights import WeightPointerStore, WeightPointerIPC
30
25
  from ..runs import new_run, snapshot_config
31
- from ..util import ensure_dir, now_ts, write_jsonl
32
- from .queue import MessageQueue, MessageType, Message
26
+ from ..util import ensure_dir, now_ts
27
+ from .queue import MessageQueue, MessageType
33
28
  from .inference_worker import InferenceConfig, run_inference_worker
34
29
  from .trainer_worker import TrainerConfig, run_trainer_worker
35
30
 
@@ -26,7 +26,7 @@ from fastapi.responses import StreamingResponse
26
26
  from ..config import ProjectConfig
27
27
  from ..llm.registry import get_llm_backend
28
28
  from ..models import resolve_model_spec
29
- from ..rlm.weights import WeightPointerStore, WeightPointerIPC
29
+ from ..rlm.weights import WeightPointerStore
30
30
  from .queue import MessageQueue, MessageType, Message
31
31
 
32
32
 
@@ -11,11 +11,11 @@ import json
11
11
  import multiprocessing as mp
12
12
  import time
13
13
  import uuid
14
- from dataclasses import asdict, dataclass, field
14
+ from dataclasses import dataclass, field
15
15
  from enum import Enum, auto
16
16
  from pathlib import Path
17
17
  from queue import Empty
18
- from typing import Any, Dict, List, Optional, Union
18
+ from typing import Any, Dict, List, Optional
19
19
 
20
20
 
21
21
  class MessageType(Enum):
@@ -9,7 +9,6 @@ from __future__ import annotations
9
9
 
10
10
  import json
11
11
  import signal
12
- import sys
13
12
  import time
14
13
  import traceback
15
14
  from collections import defaultdict
@@ -17,7 +16,6 @@ from dataclasses import dataclass, field
17
16
  from pathlib import Path
18
17
  from typing import Any, Dict, List, Optional
19
18
 
20
- from ..config import ProjectConfig
21
19
  from ..llm.registry import get_llm_backend
22
20
  from ..rlm.inference import Rollout
23
21
  from ..rlm.weights import WeightPointerStore, WeightPointerIPC
mlxsmith/rlm/loop.py CHANGED
@@ -15,12 +15,11 @@ from __future__ import annotations
15
15
  import json
16
16
  import multiprocessing as mp
17
17
  import signal
18
- import sys
19
18
  import time
20
19
  import traceback
21
- from dataclasses import dataclass, asdict
20
+ from dataclasses import dataclass
22
21
  from pathlib import Path
23
- from typing import Any, Dict, List, Optional, Callable
22
+ from typing import Dict, List, Optional
24
23
 
25
24
  from rich.console import Console
26
25
 
@@ -35,10 +34,9 @@ from ..verifiers.docker_verifier import verify as docker_verify
35
34
  from ..verifiers.pytest_verifier import verify as pytest_verify
36
35
  from .corpus import append_corpus, load_corpus, sample_corpus
37
36
  from .gating import load_state, save_state, should_accept, update_state
38
- from .generate import GeneratedTask, generate_tasks, filter_tasks
37
+ from .generate import GeneratedTask
39
38
  from .history import append_history
40
39
  from .inference import Rollout, build_tasks
41
- from .mutate import mutate_tasks
42
40
  from .trainer import train_on_rollouts
43
41
  from .weights import (
44
42
  WeightPointer,
@@ -373,9 +371,9 @@ def run_rlm(
373
371
  # Multi-Process Orchestrated RLM
374
372
  # =============================================================================
375
373
 
376
- from ..orchestrator.queue import MessageQueue, MessageType, Message
377
- from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker
378
- from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker
374
+ from ..orchestrator.queue import MessageQueue, MessageType, Message # noqa: E402
375
+ from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker # noqa: E402
376
+ from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker # noqa: E402
379
377
 
380
378
 
381
379
  @dataclass
mlxsmith/rlm/weights.py CHANGED
@@ -9,7 +9,7 @@ from __future__ import annotations
9
9
  import json
10
10
  import multiprocessing as mp
11
11
  import time
12
- from dataclasses import dataclass, asdict
12
+ from dataclasses import dataclass
13
13
  from pathlib import Path
14
14
  from typing import Optional, Callable
15
15
 
mlxsmith/sdk/future.py CHANGED
@@ -7,9 +7,8 @@ and progress tracking for async operations.
7
7
  from __future__ import annotations
8
8
 
9
9
  import threading
10
- import time
11
10
  from concurrent.futures import Future, ThreadPoolExecutor
12
- from typing import Any, Callable, Generic, Iterable, Optional, TypeVar, Union
11
+ from typing import Any, Callable, Generic, Iterable, Optional, TypeVar
13
12
 
14
13
  from ..llm.backend import DecodingConfig
15
14
 
@@ -344,7 +343,7 @@ class APIFuture(Generic[T]):
344
343
  self._run_finally_callbacks()
345
344
  return True
346
345
 
347
- def cancelled(self) -> bool: # type: ignore
346
+ def cancelled(self) -> bool: # type: ignore # noqa: F811
348
347
  """Check if the future was cancelled."""
349
348
  with self._lock:
350
349
  return self._state == APIFutureState.CANCELLED
@@ -27,8 +27,7 @@ Example:
27
27
  from __future__ import annotations
28
28
 
29
29
  from dataclasses import dataclass, field
30
- from typing import Any, Dict, List, Optional, Sequence, Union, Callable
31
- import concurrent.futures
30
+ from typing import Any, Dict, List, Optional, Sequence
32
31
 
33
32
  from .future import APIFuture, SdkFuturePool
34
33
 
@@ -22,7 +22,7 @@ Example:
22
22
  from __future__ import annotations
23
23
 
24
24
  from dataclasses import dataclass
25
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
25
+ from typing import Any, Callable, Dict, List, Optional
26
26
 
27
27
  from .future import APIFuture, SdkFuturePool
28
28
 
@@ -194,7 +194,7 @@ class TrainingClient:
194
194
  if batch.is_preference:
195
195
  # Preference training (DPO, ORPO, etc.)
196
196
  if batch.rejected_responses is None:
197
- raise ValueError(f"Preference batch requires rejected_responses")
197
+ raise ValueError("Preference batch requires rejected_responses")
198
198
 
199
199
  for prompt, chosen, rejected in zip(
200
200
  batch.prompts,
@@ -345,7 +345,6 @@ class TrainingClient:
345
345
  def _run_save() -> CheckpointResult:
346
346
  try:
347
347
  from pathlib import Path
348
- import json
349
348
 
350
349
  save_path = Path(path)
351
350
  save_path.parent.mkdir(parents=True, exist_ok=True)
@@ -673,4 +672,4 @@ class DistillationTrainingClient(TrainingClient):
673
672
 
674
673
 
675
674
  # Import at end to avoid circular dependency
676
- from .sampling_client import SamplingClient
675
+ from .sampling_client import SamplingClient # noqa: E402
mlxsmith/server.py CHANGED
@@ -10,15 +10,10 @@ This module provides:
10
10
 
11
11
  from __future__ import annotations
12
12
 
13
- import json
14
- import time
15
- import uuid
16
13
  from pathlib import Path
17
- from typing import Any, Dict, List, Optional
18
14
 
19
15
  from fastapi import FastAPI
20
- from fastapi.responses import StreamingResponse, HTMLResponse
21
- from pydantic import BaseModel
16
+ from fastapi.responses import HTMLResponse
22
17
 
23
18
  from .config import ProjectConfig
24
19
  from .models import resolve_model_spec
mlxsmith/train/lora.py CHANGED
@@ -160,6 +160,8 @@ def apply_lora(model: Any, cfg: LoRAConfig) -> dict:
160
160
  keys = sorted(_keys_for_target_modules(model, cfg.target_modules))
161
161
 
162
162
  if tuner_utils is not None and hasattr(tuner_utils, "linear_to_lora_layers"):
163
+ # Freeze all base weights first so only LoRA params are trainable
164
+ model.freeze()
163
165
  # MLX-LM format
164
166
  config = {
165
167
  "rank": int(cfg.r),
@@ -256,6 +258,8 @@ def apply_adapter(model: Any, adapter_dir: str | Path) -> dict | None:
256
258
  adapter_cfg = load_adapter_config(adapter_dir)
257
259
  tuner_utils, _ = _try_mlx_lm_utils()
258
260
  if adapter_cfg is not None and tuner_utils is not None and hasattr(tuner_utils, "load_adapters"):
261
+ # Freeze base weights so only adapter params are trainable
262
+ model.freeze()
259
263
  tuner_utils.load_adapters(model, str(adapter_dir))
260
264
  return adapter_cfg
261
265
 
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: mlxsmith
3
+ Version: 0.1.1
4
+ Summary: Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
5
+ Author-email: Shannon Labs <hmbown@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
8
+ Project-URL: Repository, https://github.com/Hmbown/MLXSmith
9
+ Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
10
+ Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Operating System :: MacOS :: MacOS X
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: typer>=0.9.0
26
+ Requires-Dist: rich>=13.7.0
27
+ Requires-Dist: pyyaml>=6.0.1
28
+ Requires-Dist: pydantic>=2.5.0
29
+ Requires-Dist: pydantic-settings>=2.2.1
30
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
31
+ Requires-Dist: huggingface_hub>=1.3.4
32
+ Requires-Dist: jsonschema>=4.21.0
33
+ Provides-Extra: mlx
34
+ Requires-Dist: mlx>=0.30.4; extra == "mlx"
35
+ Provides-Extra: llm
36
+ Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
37
+ Requires-Dist: transformers>=5.0.0; extra == "llm"
38
+ Requires-Dist: datasets>=3.0.0; extra == "llm"
39
+ Provides-Extra: serve
40
+ Requires-Dist: fastapi>=0.128.0; extra == "serve"
41
+ Requires-Dist: uvicorn>=0.40.0; extra == "serve"
42
+ Requires-Dist: httpx>=0.28.0; extra == "serve"
43
+ Provides-Extra: zmlx
44
+ Requires-Dist: zmlx; extra == "zmlx"
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=9.0.0; extra == "dev"
47
+ Requires-Dist: ruff>=0.14.0; extra == "dev"
48
+ Provides-Extra: all
49
+ Requires-Dist: mlx>=0.30.4; extra == "all"
50
+ Requires-Dist: mlx-lm>=0.30.5; extra == "all"
51
+ Requires-Dist: transformers>=5.0.0; extra == "all"
52
+ Requires-Dist: datasets>=3.0.0; extra == "all"
53
+ Requires-Dist: fastapi>=0.128.0; extra == "all"
54
+ Requires-Dist: uvicorn>=0.40.0; extra == "all"
55
+ Requires-Dist: httpx>=0.28.0; extra == "all"
56
+ Dynamic: license-file
57
+
58
+ # mlxsmith
59
+
60
+ Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
61
+
62
+ **Status:** alpha (v0.1.0). Full training pipeline validated on Qwen3-4B.
63
+
64
+ ## Install
65
+
66
+ MLX training and serving require macOS on Apple Silicon.
67
+ Other platforms can use data tools and mock backends.
68
+
69
+ ```bash
70
+ python -m venv .venv && source .venv/bin/activate
71
+ pip install -U pip
72
+
73
+ # Core CLI (data tools, config, project scaffolding)
74
+ pip install mlxsmith
75
+
76
+ # Apple Silicon training + serving
77
+ pip install "mlxsmith[mlx,llm,serve]"
78
+
79
+ # Everything
80
+ pip install "mlxsmith[all]"
81
+ ```
82
+
83
+ ## Quickstart
84
+
85
+ ```bash
86
+ mlxsmith init myproj
87
+ cd myproj
88
+ mlxsmith doctor # check Python, MLX, Metal, ZMLX
89
+ ```
90
+
91
+ ## Training
92
+
93
+ ### SFT (LoRA/QLoRA)
94
+
95
+ ```bash
96
+ mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
97
+ ```
98
+
99
+ Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
100
+
101
+ ### Preference tuning (DPO/ORPO)
102
+
103
+ ```bash
104
+ mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
105
+ --data data/prefs --algo dpo
106
+ ```
107
+
108
+ Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
109
+
110
+ ### Reinforced fine-tuning (GRPO)
111
+
112
+ ```bash
113
+ mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
114
+ --env envs/coding.yaml --verifier verifiers/pytest.py
115
+ ```
116
+
117
+ GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
118
+
119
+ ### Knowledge distillation
120
+
121
+ ```bash
122
+ # Offline distillation (teacher generates, student learns)
123
+ mlxsmith distill --teacher large-model --student small-model --mode offline
124
+
125
+ # Online preference distillation (OPD)
126
+ mlxsmith distill --teacher large-model --student small-model --mode opd
127
+ ```
128
+
129
+ ### Full pipeline
130
+
131
+ ```bash
132
+ # Run SFT → Pref → RFT in sequence
133
+ mlxsmith pipeline
134
+ ```
135
+
136
+ ## Serving
137
+
138
+ OpenAI-compatible `/v1/chat/completions` endpoint.
139
+
140
+ ```bash
141
+ mlxsmith serve --model runs/sft_0001/adapter --port 8080
142
+ ```
143
+
144
+ ```bash
145
+ curl http://localhost:8080/v1/chat/completions \
146
+ -H 'Content-Type: application/json' \
147
+ -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
148
+ ```
149
+
150
+ Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
151
+
152
+ ## Data tools
153
+
154
+ ```bash
155
+ mlxsmith data presets # list built-in datasets
156
+ mlxsmith data pull alpaca # pull a preset
157
+ mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
158
+ mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
159
+ mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
160
+ mlxsmith data validate data/sft/train.jsonl # structure check
161
+ ```
162
+
163
+ Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
164
+
165
+ ## Model management
166
+
167
+ ```bash
168
+ # Pull + convert HF model to MLX
169
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
170
+
171
+ # With quantization
172
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
173
+
174
+ # Merge adapters
175
+ mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
176
+ ```
177
+
178
+ ## HF auth
179
+
180
+ ```bash
181
+ mlxsmith auth login --token "$HF_TOKEN"
182
+ mlxsmith auth status
183
+ mlxsmith auth logout
184
+ ```
185
+
186
+ ## Eval and bench
187
+
188
+ ```bash
189
+ # Evaluation suite (pass@k with verifier checks)
190
+ mlxsmith eval --suite eval/suites/coding.yaml
191
+
192
+ # Benchmark inference or training throughput
193
+ mlxsmith bench --mode inference
194
+ mlxsmith bench --mode trainer
195
+ mlxsmith bench --mode end_to_end
196
+ ```
197
+
198
+ ## Verifiers
199
+
200
+ Built-in verifiers for eval, RFT, and preference tuning:
201
+
202
+ - **regex** — pattern matching on completions
203
+ - **jsonschema** — JSON structure validation
204
+ - **pytest** — sandboxed test execution
205
+ - **docker** — containerized verification
206
+ - **compose** — multi-verifier composition (AND/OR/weighted)
207
+
208
+ See `docs/VERIFIERS.md` for the verifier API.
209
+
210
+ ## Environment plugin system
211
+
212
+ ```bash
213
+ mlxsmith env list # list available environments
214
+ mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
215
+ mlxsmith env init my_env # scaffold a new environment
216
+ mlxsmith env install ./my_env # install from directory
217
+ mlxsmith env package ./my_env # create distributable tarball
218
+ mlxsmith env run envs/coding.yaml # execute RFT with this environment
219
+ ```
220
+
221
+ Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
222
+
223
+ ## Config system
224
+
225
+ ```bash
226
+ mlxsmith config show # display merged config (YAML/JSON/TOML)
227
+ mlxsmith config show --sources # show where each value comes from
228
+ mlxsmith config init # create default mlxsmith.yaml
229
+ mlxsmith config validate # check config structure
230
+ mlxsmith config env # show environment variable mapping
231
+ ```
232
+
233
+ Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
234
+
235
+ ## SDK (programmatic API)
236
+
237
+ For building custom training loops:
238
+
239
+ ```python
240
+ from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
241
+
242
+ loaded = load_model("path/to/model", config)
243
+
244
+ # Sampling with logprobs
245
+ sampler = SamplingClient(loaded.backend)
246
+ result = sampler.sample("prompt", logprobs_k=5)
247
+
248
+ # Training operations
249
+ trainer = TrainingClient(loaded.backend)
250
+ trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
251
+ fb = trainer.forward_backward(batch)
252
+ trainer.optim_step(fb.result().grads)
253
+ ```
254
+
255
+ Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
256
+
257
+ ## Research
258
+
259
+ ### RLM self-play loop
260
+
261
+ RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
262
+
263
+ ```bash
264
+ mlxsmith rlm # single-process RLM
265
+ mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
266
+ mlxsmith rlm status # check iteration state
267
+ mlxsmith rlm history # view history
268
+ ```
269
+
270
+ Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
271
+
272
+ ### ZMLX acceleration
273
+
274
+ Optional zero-copy MLX acceleration backend.
275
+
276
+ ```bash
277
+ mlxsmith accel status
278
+ ```
279
+
280
+ ## Docs
281
+
282
+ - `docs/PROJECT_FORMAT.md` — project layout and artifacts
283
+ - `docs/VERIFIERS.md` — verifier API and sandbox behavior
284
+ - `docs/COMPATIBILITY.md` — tested versions and model families
285
+ - `docs/ENVIRONMENTS.md` — environment plugin system
286
+ - `docs/orchestrator.md` — multi-process RLM orchestrator
287
+ - `docs/rlm-ctl.md` — RLM training guide
288
+ - `docs/ROADMAP.md` — product direction and milestones
289
+ - `docs/README.md` — full docs index
290
+
291
+ ## License
292
+
293
+ MIT
@@ -2,55 +2,55 @@ mlxsmith/__init__.py,sha256=CJZKl9Hp16DYlQR7yqstir-cL4n7GCw90d-meXliCHk,48
2
2
  mlxsmith/adapters.py,sha256=wkQ2q_ugaxCviNARSmxehwBcc2_NKVJ7mOofT-y30TY,1318
3
3
  mlxsmith/auth.py,sha256=_j_gx5ccZfpHs0_Xmpcgh_ELhX3ZBJLg2YYpjA-aPRI,2195
4
4
  mlxsmith/bench.py,sha256=VBgY9uOGm3xhmL3UrNNOnUoa3P0yaVQQ7wxykIEmDEw,3778
5
- mlxsmith/cli.py,sha256=YHjrPti1OCe0m2gpEwygiJ6_x-xeycKHRDYCksdcOuM,35750
6
- mlxsmith/config.py,sha256=iFDhO7KcAtq1KySGD7TfNMYqbQcec0Tc0-VJdr5Gwo4,16385
5
+ mlxsmith/cli.py,sha256=I7vzO2Z3ja1ncMOGHhXSEN_kElfWdmA6Jvs4rFgINZ0,35767
6
+ mlxsmith/config.py,sha256=K1VbN-3WoWf4vzZ6BNeTgEz2DFH8s9YminqwyNBVLj0,16389
7
7
  mlxsmith/config_models.py,sha256=pMsLGyC9J9T9jqs5ipYFUuEkfir7iiDA4EZdcb65k5g,8407
8
8
  mlxsmith/data.py,sha256=3ZlNS8bnD7LlWACEmULbf8RGQzCuf0QulFpI1PWvNuI,16160
9
9
  mlxsmith/eval.py,sha256=nSARSEKKwZM8Ot5rUDDpGikaClGNxvg0ifgGkTA6mM0,3792
10
10
  mlxsmith/infer.py,sha256=ekpHhTird0dnTJzFOc-O98rjwkEKgAr9AFicKlaB3MA,4610
11
11
  mlxsmith/models.py,sha256=BRaPTxzqy-5KEKdccveMgjpbRP0ZmbRnA_su8rz2P4k,8033
12
12
  mlxsmith/runs.py,sha256=2voYBryGGMlAKskHJ7TDiIPQL2_fFxSQ8RgtfGZ7ccg,1409
13
- mlxsmith/server.py,sha256=CBxmV9WwpyivsHytVALgT4DOGh1vqjwm4W_iHBH79rg,10821
13
+ mlxsmith/server.py,sha256=Fk-i9xK_Teq0Z0m-W1GRJVtcG0nYvd4bQ85lnAUuT1w,10690
14
14
  mlxsmith/util.py,sha256=8fagFtMP2YS1qlWkCt8bPWAz2jAgfvAf1ipNGJZAgIk,4544
15
15
  mlxsmith/accel/__init__.py,sha256=Mv2mj-2bLqUILhMUCjMLu7JORcu0-cGBOri32j7O5Xo,291
16
16
  mlxsmith/accel/base.py,sha256=o0kmxV68KbxOmucC3eDjKbFA8dfTT2ETqN0XD_l3mM0,435
17
17
  mlxsmith/accel/none.py,sha256=WhxECIBv-pE63Vh1Iv86ObgT9JHOi4hA4BUyJc__sKU,362
18
18
  mlxsmith/accel/zmlx_backend.py,sha256=JfzQ44v9hrCJgcqU018ZD7qLNlubIe09CwYRpKyfwR8,1529
19
19
  mlxsmith/api/__init__.py,sha256=IrpIXDkUJm4BZqahYOK_0BkxvomlscEvCoLCm4GDxo8,998
20
- mlxsmith/api/handlers.py,sha256=5YKQfHLG2mWjCugOkG1L3veiPMz_Qg_s4h12xx6eibs,47068
20
+ mlxsmith/api/handlers.py,sha256=94Spq4glFp7mRwmKrFqt7erd1Af_PxVP_vpxCo2UFdQ,46896
21
21
  mlxsmith/api/schemas.py,sha256=Q18kF9FKtvT1vdnXy6feSNTtCV2FiRWDzfdsPzc0te8,19316
22
22
  mlxsmith/envs/__init__.py,sha256=t7QiEHtfyP1dUCj-4TJUN0hd9lRqBKYd5Ek7dgEwus4,671
23
23
  mlxsmith/envs/system.py,sha256=2bChkOxm2S7d0WCrweHGhoI6-xOYDxlC0YbHH6Ibjq4,12782
24
24
  mlxsmith/envs/token_env.py,sha256=rhv2o3eI1GyTtfAXG72z-31amNGaLv0KW56mEsWkXlY,6709
25
25
  mlxsmith/llm/__init__.py,sha256=jWEkXGdvwZ8tUYHVqWW3SYHXG-LSWaGbdwOR0mF_4Zw,396
26
26
  mlxsmith/llm/backend.py,sha256=_xGfdJ30_6Nq8zIqMjTemCZWV-vUi9qV0djfwxny5SM,4335
27
- mlxsmith/llm/interface.py,sha256=zmqJuyiEQqjEpDjZwooS1hp1GfTPCTiWYJ5nPw8xSqg,6633
27
+ mlxsmith/llm/interface.py,sha256=udQl_R7ecmM4Nh20P50Nmnv2h853ByrgevjQIRDxX4g,6601
28
28
  mlxsmith/llm/mlx_lm_backend.py,sha256=OitqY_6LqnfqbN7GZz39Ma3cItjjNqHXF3SV3AZsHxk,18284
29
- mlxsmith/llm/mock_backend.py,sha256=DXXnMlXZGCtf0datgjFc5z_X8JMEKaYDRPXKU1erGdQ,7449
29
+ mlxsmith/llm/mock_backend.py,sha256=ZTkPRnRmCXSkhOJ1TLbSgTnAcLTKa9sq6-zzbTEX6Qc,7439
30
30
  mlxsmith/llm/registry.py,sha256=ZmYE-WclyMo6z0HwUufqt3tKT4E84xZ6I-PFu1Z5nws,309
31
31
  mlxsmith/orchestrator/__init__.py,sha256=oc4qIkISZMGMvYeOqU8lDDmFL3uxDYJHsv_rra9DH-E,792
32
- mlxsmith/orchestrator/daemon.py,sha256=MRQf84sCjeShBFcOcF7yfP6HhHl7IHrnmw0nV50mJrI,16360
33
- mlxsmith/orchestrator/inference_worker.py,sha256=4AI_j7qnrnwXObBxSa7YHlZHnGfqou-W1fpqHYt8jpQ,17981
34
- mlxsmith/orchestrator/queue.py,sha256=8c1n-fiW3ITcLbwOeFlH_AEZdJYvZumy8EczJ0lh4NA,11301
35
- mlxsmith/orchestrator/trainer_worker.py,sha256=uvJQesXjfdsiNRsO2FVHhUk0WmMB_PQNSPff5U9Swp4,16061
32
+ mlxsmith/orchestrator/daemon.py,sha256=VJFF8s_85h4C-YM14wRUlzDHrhhVPfQOztmNLKyRk30,16107
33
+ mlxsmith/orchestrator/inference_worker.py,sha256=PfmsanrBnx9HZNqG00jTQQTKqDa2bl-wUtYAWtxfzvs,17963
34
+ mlxsmith/orchestrator/queue.py,sha256=E8VymvJi2zEpuTwsG7JB-vROJGGS5evPPhIpkmdwtq4,11286
35
+ mlxsmith/orchestrator/trainer_worker.py,sha256=IM7vOhkYlCKoQSC1tNobkeIVnkmsz4DtAWfu24bjJXY,16015
36
36
  mlxsmith/rlm/__init__.py,sha256=Q09oRONXWTFXuWwMJOpGWg0I-UDkuib0OA1O_cNFp2U,236
37
37
  mlxsmith/rlm/corpus.py,sha256=-p12H650_ybe2kXC219M4wXYpD08QHUpY92ErVjSfX8,2112
38
38
  mlxsmith/rlm/gating.py,sha256=L18niYKEezphASdsgzW6pz3PN7ylA-L5Wu4_GLLVfHw,2455
39
39
  mlxsmith/rlm/generate.py,sha256=q1v_TP8sqVj05omhoF60Ns1iX6yClgc7lP6njz4lK18,7601
40
40
  mlxsmith/rlm/history.py,sha256=Vm4JtWqsZnqB-fuo3zWfweeogmmLTL3VHaYZ45vrkz8,299
41
41
  mlxsmith/rlm/inference.py,sha256=ntCEKxD1KrkIXgZNQbD4jhS5rJPtwcVYc8qLc5E5cnc,5297
42
- mlxsmith/rlm/loop.py,sha256=WLRP1PI5PX7OjSEZsurrCwSTVVPNVIx4-7td1ihSMY0,49955
42
+ mlxsmith/rlm/loop.py,sha256=nkAR5KYErcq893kQYigFkl3NG469ZUu1CkN_sS1ObAM,49900
43
43
  mlxsmith/rlm/mutate.py,sha256=_NUNMpVCRaEDgtzI8J2NOTcj5NnycZnP_UoUpFacjTs,2553
44
44
  mlxsmith/rlm/trainer.py,sha256=RRXPlJy4SySpLZGge0ORMYs7HeiWgfGQNeMBOBfG4Ys,3014
45
- mlxsmith/rlm/weights.py,sha256=NO7wjl2T0eXTVFoYrzPT_IUmaLvD2z-zSSyKpcX93kY,8463
45
+ mlxsmith/rlm/weights.py,sha256=tgl4Uc80QF9YpCCr3ewBmL7uru9As2fDA1Z1SgZn-e4,8455
46
46
  mlxsmith/sdk/__init__.py,sha256=42WpTgC309sYKp8SArULBWz0trVN51THcjvPdVh-thc,10777
47
- mlxsmith/sdk/future.py,sha256=mleqPgJ997hSuZuQegvS2GoOxqo_gd4pfh37gv70APc,16873
47
+ mlxsmith/sdk/future.py,sha256=WmYB-fDstaEuv-FUNX_S7IJSENbVEsYYEEVzH02ImLk,16868
48
48
  mlxsmith/sdk/losses.py,sha256=lJi3R7Red_QO3IatbhKi_GBI7kM0yu-kS14xN2kX_04,7532
49
- mlxsmith/sdk/sampling_client.py,sha256=AkjJHEZ8OLU8SZoALk7ds3NHMquiqk9GGZHqni5vB2g,24942
50
- mlxsmith/sdk/training_client.py,sha256=4GXcswTJypkymPO3E5DREUesGzlumbmUrTnt0T80wEI,24380
49
+ mlxsmith/sdk/sampling_client.py,sha256=o7jfgYpVWXrrIHo4-SrGAJx4FAlYdo198da27Jp0Yj4,24899
50
+ mlxsmith/sdk/training_client.py,sha256=71bSgS65ofXL9X4qGSyDhVL5asDxENAf95LV66pwC3g,24341
51
51
  mlxsmith/train/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  mlxsmith/train/distill.py,sha256=9Xbzn6zt8uqYOwg_pJB2rZJj8y-nESP1OB8DLxnJ0jM,10378
53
- mlxsmith/train/lora.py,sha256=E8ymi1wUVsGp5-0DldvkSWDHNh_5Fhr6AelJOR5YoPw,8859
53
+ mlxsmith/train/lora.py,sha256=k3aNqBjMyE6rPGS2CAJRSDsTJiUa1ztjrA3k9N87IjY,9046
54
54
  mlxsmith/train/pref.py,sha256=-z7mj-MQu6dPb8y0U6sRFbqKU0nNQ3YRpC3CcmS3l5k,6987
55
55
  mlxsmith/train/rft.py,sha256=bf6z-h6VQKvMDZ0XN_ayZV44YsIvuwSzP1YRYrwSQ2M,18956
56
56
  mlxsmith/train/sft.py,sha256=w3QmLLoscNQzz-xbtmrCw46PbYIApvgeQi0XjxCop90,5590
@@ -61,9 +61,9 @@ mlxsmith/verifiers/jsonschema.py,sha256=hG_8c07Hwv-tpN2g0oxELwmLRxS8QGzRFwabmo4y
61
61
  mlxsmith/verifiers/pytest_verifier.py,sha256=ARNajzxUPNwtzSow6I2d0mLopZyvY29_d3F1sYVwEUY,2514
62
62
  mlxsmith/verifiers/regex.py,sha256=N7z3koE8Iy-a4DBs4404iQCNX2WGxequm5g4umric2Y,524
63
63
  mlxsmith/verifiers/types.py,sha256=FytBxB1OnNX1EcqZXSSs3WvL0GRv7byW4mfBMf6xP68,240
64
- mlxsmith-0.1.0.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
65
- mlxsmith-0.1.0.dist-info/METADATA,sha256=yRnNRKdCFPZQRgAN7qtxCDBrUJxplOqK3mTV_Uoh2jA,4812
66
- mlxsmith-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
- mlxsmith-0.1.0.dist-info/entry_points.txt,sha256=ys7GcKEjhzhkkTMBmmaNavTgsjqOuFnCKIG2w8Wcn6w,46
68
- mlxsmith-0.1.0.dist-info/top_level.txt,sha256=hKBwc8bn7uoI-_5Yhcq1T3IuChFhUFdzItIkZK1up6A,9
69
- mlxsmith-0.1.0.dist-info/RECORD,,
64
+ mlxsmith-0.1.1.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
65
+ mlxsmith-0.1.1.dist-info/METADATA,sha256=d0H3FBZa2-BfGVbz3TWpjtgWhMe-t46bp8PQELh5yFQ,9131
66
+ mlxsmith-0.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
+ mlxsmith-0.1.1.dist-info/entry_points.txt,sha256=ys7GcKEjhzhkkTMBmmaNavTgsjqOuFnCKIG2w8Wcn6w,46
68
+ mlxsmith-0.1.1.dist-info/top_level.txt,sha256=hKBwc8bn7uoI-_5Yhcq1T3IuChFhUFdzItIkZK1up6A,9
69
+ mlxsmith-0.1.1.dist-info/RECORD,,
@@ -1,163 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: mlxsmith
3
- Version: 0.1.0
4
- Summary: Apple Silicon MLX fine-tuning and OpenAI-compatible serving (SFT stable; preference/RL experimental).
5
- Author-email: Shannon Labs <hmbown@gmail.com>
6
- License: MIT
7
- Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
8
- Project-URL: Repository, https://github.com/Hmbown/MLXSmith
9
- Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
10
- Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: Intended Audience :: Science/Research
14
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
- Classifier: License :: OSI Approved :: MIT License
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3 :: Only
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Operating System :: MacOS :: MacOS X
22
- Requires-Python: >=3.10
23
- Description-Content-Type: text/markdown
24
- License-File: LICENSE
25
- Requires-Dist: typer>=0.9.0
26
- Requires-Dist: rich>=13.7.0
27
- Requires-Dist: pyyaml>=6.0.1
28
- Requires-Dist: pydantic>=2.5.0
29
- Requires-Dist: pydantic-settings>=2.2.1
30
- Requires-Dist: tomli>=2.0.1; python_version < "3.11"
31
- Requires-Dist: huggingface_hub>=1.3.4
32
- Requires-Dist: jsonschema>=4.21.0
33
- Provides-Extra: mlx
34
- Requires-Dist: mlx>=0.30.4; extra == "mlx"
35
- Provides-Extra: llm
36
- Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
37
- Requires-Dist: transformers>=5.0.0; extra == "llm"
38
- Requires-Dist: datasets>=3.0.0; extra == "llm"
39
- Provides-Extra: serve
40
- Requires-Dist: fastapi>=0.128.0; extra == "serve"
41
- Requires-Dist: uvicorn>=0.40.0; extra == "serve"
42
- Requires-Dist: httpx>=0.28.0; extra == "serve"
43
- Provides-Extra: zmlx
44
- Requires-Dist: zmlx; extra == "zmlx"
45
- Provides-Extra: dev
46
- Requires-Dist: pytest>=9.0.0; extra == "dev"
47
- Requires-Dist: ruff>=0.14.0; extra == "dev"
48
- Provides-Extra: all
49
- Requires-Dist: mlx>=0.30.4; extra == "all"
50
- Requires-Dist: mlx-lm>=0.30.5; extra == "all"
51
- Requires-Dist: transformers>=5.0.0; extra == "all"
52
- Requires-Dist: datasets>=3.0.0; extra == "all"
53
- Requires-Dist: fastapi>=0.128.0; extra == "all"
54
- Requires-Dist: uvicorn>=0.40.0; extra == "all"
55
- Requires-Dist: httpx>=0.28.0; extra == "all"
56
- Dynamic: license-file
57
-
58
- # mlxsmith
59
-
60
- Apple Silicon MLX fine-tuning and OpenAI-compatible serving.
61
- SFT + serving are stable. Preference/RL/RLM features are experimental.
62
-
63
- Status: alpha (2026-02-02).
64
-
65
- ## Stable features
66
- - Project init, config, data tools, HF auth, model pull/convert.
67
- - SFT (LoRA/QLoRA) training with run tracking and adapters.
68
- - Inference and OpenAI-compatible /v1/chat/completions serving.
69
- - Basic eval/bench and verifier plumbing (regex/jsonschema/pytest).
70
-
71
- ## Experimental features
72
- - Preference tuning (DPO/ORPO).
73
- - GRPO-style RFT.
74
- - RLM self-play loop (research).
75
- - Distill/OPD and orchestrated RLM.
76
-
77
- ## Install
78
-
79
- MLX is only available on Apple Silicon. Other platforms can still use data tools
80
- and mock backends, but MLX training and serving require macOS on Apple Silicon.
81
-
82
- ```bash
83
- python -m venv .venv && source .venv/bin/activate
84
- pip install -U pip
85
-
86
- # Core CLI
87
- pip install mlxsmith
88
-
89
- # Apple Silicon training + serving
90
- pip install "mlxsmith[mlx,llm,serve]"
91
- ```
92
-
93
- ## Quickstart
94
-
95
- ```bash
96
- mlxsmith init myproj
97
- cd myproj
98
- mlxsmith doctor
99
- ```
100
-
101
- ## HF auth (optional)
102
-
103
- ```bash
104
- mlxsmith auth login --token "$HF_TOKEN"
105
- mlxsmith auth status
106
- mlxsmith auth logout
107
- ```
108
-
109
- ## Pull + convert a model (HF -> MLX)
110
-
111
- ```bash
112
- mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
113
- # outputs to cache/mlx/Qwen__Qwen3-4B-Instruct-2507
114
- ```
115
-
116
- Optional quantization:
117
-
118
- ```bash
119
- mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
120
- ```
121
-
122
- ## SFT (LoRA/QLoRA)
123
-
124
- ```bash
125
- mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
126
- ```
127
-
128
- ## Serve (OpenAI-compatible)
129
-
130
- ```bash
131
- mlxsmith serve --model runs/sft_0001/adapter --port 8080
132
- ```
133
-
134
- Sample request:
135
-
136
- ```bash
137
- curl http://localhost:8080/v1/chat/completions \
138
- -H 'Content-Type: application/json' \
139
- -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
140
- ```
141
-
142
- To enable the optional UI/monitor dashboard, set `serve.ui: true` in `mlxsmith.yaml`.
143
-
144
- ## Experimental commands
145
-
146
- - `mlxsmith pref` (DPO/ORPO)
147
- - `mlxsmith rft` (GRPO-style)
148
- - `mlxsmith rlm` / `mlxsmith pipeline` (self-play loop)
149
- - `mlxsmith distill` (offline/OPD)
150
- - `mlxsmith eval` / `mlxsmith bench`
151
-
152
- ## Docs
153
-
154
- - `docs/PROJECT_FORMAT.md` for project layout and artifacts.
155
- - `docs/VERIFIERS.md` for verifier API and sandbox behavior.
156
- - `docs/COMPATIBILITY.md` for tested versions and model families.
157
- - `docs/ENVIRONMENTS.md` for the environment plugin system.
158
- - `docs/ROADMAP.md` for product direction and milestones.
159
- - `docs/README.md` for the full docs index.
160
-
161
- ## License
162
-
163
- MIT