mlxsmith 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,7 @@
1
1
  from __future__ import annotations
2
2
  from .none import NoneBackend
3
- from .zmlx_backend import ZMLXBackend
4
3
 
5
4
  def get_backend(name: str):
6
5
  if name == "none":
7
6
  return NoneBackend()
8
- if name == "zmlx":
9
- return ZMLXBackend()
10
7
  raise ValueError(f"Unknown accel backend: {name}")
mlxsmith/api/handlers.py CHANGED
@@ -20,11 +20,10 @@ import uuid
20
20
  from pathlib import Path
21
21
  from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
22
22
 
23
- from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request, Security, status
23
+ from fastapi import APIRouter, FastAPI, HTTPException, Request, Security, status
24
24
  from starlette.middleware.base import BaseHTTPMiddleware
25
25
  from fastapi.responses import StreamingResponse
26
26
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
27
- from pydantic import BaseModel
28
27
 
29
28
  from .schemas import (
30
29
  AdapterReloadRequest,
@@ -1102,12 +1101,10 @@ def create_router(
1102
1101
  models, use the list endpoint to check completion status.
1103
1102
  """
1104
1103
  cache_dir = _get_cache_dir()
1105
- local_path = cache_dir / "mlx" / request.model_id.replace("/", "__")
1106
-
1104
+
1107
1105
  try:
1108
1106
  # Import here to avoid circular dependencies
1109
1107
  from ..models import hf_pull
1110
- from ..config import ProjectConfig
1111
1108
 
1112
1109
  # Get HF token if available
1113
1110
  hf_token = None
mlxsmith/cli.py CHANGED
@@ -43,7 +43,7 @@ from .envs import (
43
43
 
44
44
  app = typer.Typer(
45
45
  add_completion=False,
46
- help="mlxsmith — MLX fine-tuning + OpenAI-compatible serving (SFT stable; preference/RL experimental)",
46
+ help="mlxsmith — Apple Silicon MLX fine-tuning toolkit: SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.",
47
47
  )
48
48
  console = Console()
49
49
 
@@ -83,7 +83,6 @@ def doctor():
83
83
  table.add_row("cpu_count", str(info.cpu_count))
84
84
  table.add_row("metal", str(info.has_metal))
85
85
  table.add_row("mlx", f"{info.has_mlx} {info.mlx_version or ''}".strip())
86
- table.add_row("zmlx", str(info.has_zmlx))
87
86
  console.print(table)
88
87
 
89
88
 
@@ -564,7 +563,7 @@ def config_validate(
564
563
 
565
564
  try:
566
565
  cfg = load_config(cfg_path, require=True)
567
- console.print(f"[green]✓ Configuration is valid[/green]")
566
+ console.print("[green]✓ Configuration is valid[/green]")
568
567
 
569
568
  # Show summary
570
569
  table = Table(title="Configuration Summary")
@@ -593,9 +592,9 @@ def config_env(
593
592
  """Show available environment variables."""
594
593
  cfg = ProjectConfig()
595
594
 
596
- console.print(f"\n[bold]Environment Variable Configuration[/bold]")
595
+ console.print("\n[bold]Environment Variable Configuration[/bold]")
597
596
  console.print(f"Prefix: [cyan]{prefix}[/cyan]")
598
- console.print(f"Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
597
+ console.print("Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
599
598
 
600
599
  table = Table(title=f"Available {prefix}* Environment Variables")
601
600
  table.add_column("Environment Variable")
@@ -729,7 +728,7 @@ def rlm_history(limit: int = typer.Option(10, "--limit")):
729
728
 
730
729
  @accel_app.command("status")
731
730
  def accel_status():
732
- backends = ["none", "zmlx"]
731
+ backends = ["none"]
733
732
  table = Table(title="mlxsmith accel status")
734
733
  table.add_column("backend")
735
734
  table.add_column("available")
mlxsmith/config.py CHANGED
@@ -18,7 +18,6 @@ Config files support @path syntax:
18
18
  from __future__ import annotations
19
19
 
20
20
  import json
21
- import os
22
21
  from pathlib import Path
23
22
  from typing import Any, Dict, List, Optional, Tuple, Union
24
23
 
@@ -94,7 +93,7 @@ class ProjectSettings(BaseSettings):
94
93
 
95
94
 
96
95
  # Import CLI aliases from models
97
- from .config_models import CLI_ALIASES as _CLI_ALIASES
96
+ from .config_models import CLI_ALIASES as _CLI_ALIASES # noqa: E402
98
97
 
99
98
 
100
99
  def resolve_config_path(config: Union[str, Path], root: Optional[Path] = None) -> Path:
mlxsmith/config_models.py CHANGED
@@ -6,7 +6,7 @@ from typing import Dict, List, Literal, Optional, Any
6
6
 
7
7
  from pydantic import BaseModel, Field, field_validator
8
8
 
9
- AccelBackendName = Literal["none", "zmlx"]
9
+ AccelBackendName = Literal["none"]
10
10
 
11
11
 
12
12
  class ModelConfig(BaseModel):
mlxsmith/llm/interface.py CHANGED
@@ -182,11 +182,9 @@ def compute_logprobs(
182
182
  else:
183
183
  prompt_len = len(prompt_ids)
184
184
 
185
- # Get generation with logprobs
186
- full_text = backend.decode(ids)
187
-
188
- # Use backend's sequence_logprob if available
189
- seq_logprob = backend.sequence_logprob(ids, prompt_len=prompt_len)
185
+ # Decode and compute sequence-level logprob (used by callers via backend state)
186
+ backend.decode(ids)
187
+ backend.sequence_logprob(ids, prompt_len=prompt_len)
190
188
 
191
189
  # For per-token logprobs, we'd need to do a forward pass
192
190
  # This is a simplified version
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import random
4
4
  from dataclasses import dataclass
5
- from typing import Sequence, Any, List, Dict, Optional
5
+ from typing import Sequence, Any, List, Dict
6
6
 
7
7
  from .backend import Generation
8
8
 
@@ -9,27 +9,22 @@ Manages rollout requests, training batches, and weight updates.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
- import json
13
12
  import multiprocessing as mp
14
13
  import signal
15
- import sys
16
14
  import time
17
15
  import traceback
18
16
  from dataclasses import dataclass, field
19
17
  from pathlib import Path
20
- from typing import Any, Dict, List, Optional, Callable
18
+ from typing import Any, Dict, List, Optional
21
19
 
22
20
  from rich.console import Console
23
21
 
24
22
  from ..config import ProjectConfig
25
- from ..rlm.corpus import append_corpus, load_corpus, sample_corpus
26
- from ..rlm.gating import load_state, save_state, should_accept, update_state
27
- from ..rlm.history import append_history
28
- from ..rlm.inference import Rollout, build_tasks
23
+ from ..rlm.gating import load_state
29
24
  from ..rlm.weights import WeightPointerStore, WeightPointerIPC
30
25
  from ..runs import new_run, snapshot_config
31
- from ..util import ensure_dir, now_ts, write_jsonl
32
- from .queue import MessageQueue, MessageType, Message
26
+ from ..util import ensure_dir, now_ts
27
+ from .queue import MessageQueue, MessageType
33
28
  from .inference_worker import InferenceConfig, run_inference_worker
34
29
  from .trainer_worker import TrainerConfig, run_trainer_worker
35
30
 
@@ -26,7 +26,7 @@ from fastapi.responses import StreamingResponse
26
26
  from ..config import ProjectConfig
27
27
  from ..llm.registry import get_llm_backend
28
28
  from ..models import resolve_model_spec
29
- from ..rlm.weights import WeightPointerStore, WeightPointerIPC
29
+ from ..rlm.weights import WeightPointerStore
30
30
  from .queue import MessageQueue, MessageType, Message
31
31
 
32
32
 
@@ -11,11 +11,11 @@ import json
11
11
  import multiprocessing as mp
12
12
  import time
13
13
  import uuid
14
- from dataclasses import asdict, dataclass, field
14
+ from dataclasses import dataclass, field
15
15
  from enum import Enum, auto
16
16
  from pathlib import Path
17
17
  from queue import Empty
18
- from typing import Any, Dict, List, Optional, Union
18
+ from typing import Any, Dict, List, Optional
19
19
 
20
20
 
21
21
  class MessageType(Enum):
@@ -9,7 +9,6 @@ from __future__ import annotations
9
9
 
10
10
  import json
11
11
  import signal
12
- import sys
13
12
  import time
14
13
  import traceback
15
14
  from collections import defaultdict
@@ -17,7 +16,6 @@ from dataclasses import dataclass, field
17
16
  from pathlib import Path
18
17
  from typing import Any, Dict, List, Optional
19
18
 
20
- from ..config import ProjectConfig
21
19
  from ..llm.registry import get_llm_backend
22
20
  from ..rlm.inference import Rollout
23
21
  from ..rlm.weights import WeightPointerStore, WeightPointerIPC
mlxsmith/rlm/loop.py CHANGED
@@ -15,12 +15,11 @@ from __future__ import annotations
15
15
  import json
16
16
  import multiprocessing as mp
17
17
  import signal
18
- import sys
19
18
  import time
20
19
  import traceback
21
- from dataclasses import dataclass, asdict
20
+ from dataclasses import dataclass
22
21
  from pathlib import Path
23
- from typing import Any, Dict, List, Optional, Callable
22
+ from typing import Dict, List, Optional
24
23
 
25
24
  from rich.console import Console
26
25
 
@@ -35,10 +34,9 @@ from ..verifiers.docker_verifier import verify as docker_verify
35
34
  from ..verifiers.pytest_verifier import verify as pytest_verify
36
35
  from .corpus import append_corpus, load_corpus, sample_corpus
37
36
  from .gating import load_state, save_state, should_accept, update_state
38
- from .generate import GeneratedTask, generate_tasks, filter_tasks
37
+ from .generate import GeneratedTask
39
38
  from .history import append_history
40
39
  from .inference import Rollout, build_tasks
41
- from .mutate import mutate_tasks
42
40
  from .trainer import train_on_rollouts
43
41
  from .weights import (
44
42
  WeightPointer,
@@ -373,9 +371,9 @@ def run_rlm(
373
371
  # Multi-Process Orchestrated RLM
374
372
  # =============================================================================
375
373
 
376
- from ..orchestrator.queue import MessageQueue, MessageType, Message
377
- from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker
378
- from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker
374
+ from ..orchestrator.queue import MessageQueue, MessageType, Message # noqa: E402
375
+ from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker # noqa: E402
376
+ from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker # noqa: E402
379
377
 
380
378
 
381
379
  @dataclass
mlxsmith/rlm/weights.py CHANGED
@@ -9,7 +9,7 @@ from __future__ import annotations
9
9
  import json
10
10
  import multiprocessing as mp
11
11
  import time
12
- from dataclasses import dataclass, asdict
12
+ from dataclasses import dataclass
13
13
  from pathlib import Path
14
14
  from typing import Optional, Callable
15
15
 
mlxsmith/sdk/future.py CHANGED
@@ -7,9 +7,8 @@ and progress tracking for async operations.
7
7
  from __future__ import annotations
8
8
 
9
9
  import threading
10
- import time
11
10
  from concurrent.futures import Future, ThreadPoolExecutor
12
- from typing import Any, Callable, Generic, Iterable, Optional, TypeVar, Union
11
+ from typing import Any, Callable, Generic, Iterable, Optional, TypeVar
13
12
 
14
13
  from ..llm.backend import DecodingConfig
15
14
 
@@ -344,7 +343,7 @@ class APIFuture(Generic[T]):
344
343
  self._run_finally_callbacks()
345
344
  return True
346
345
 
347
- def cancelled(self) -> bool: # type: ignore
346
+ def cancelled(self) -> bool: # type: ignore # noqa: F811
348
347
  """Check if the future was cancelled."""
349
348
  with self._lock:
350
349
  return self._state == APIFutureState.CANCELLED
@@ -27,8 +27,7 @@ Example:
27
27
  from __future__ import annotations
28
28
 
29
29
  from dataclasses import dataclass, field
30
- from typing import Any, Dict, List, Optional, Sequence, Union, Callable
31
- import concurrent.futures
30
+ from typing import Any, Dict, List, Optional, Sequence
32
31
 
33
32
  from .future import APIFuture, SdkFuturePool
34
33
 
@@ -22,7 +22,7 @@ Example:
22
22
  from __future__ import annotations
23
23
 
24
24
  from dataclasses import dataclass
25
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
25
+ from typing import Any, Callable, Dict, List, Optional
26
26
 
27
27
  from .future import APIFuture, SdkFuturePool
28
28
 
@@ -194,7 +194,7 @@ class TrainingClient:
194
194
  if batch.is_preference:
195
195
  # Preference training (DPO, ORPO, etc.)
196
196
  if batch.rejected_responses is None:
197
- raise ValueError(f"Preference batch requires rejected_responses")
197
+ raise ValueError("Preference batch requires rejected_responses")
198
198
 
199
199
  for prompt, chosen, rejected in zip(
200
200
  batch.prompts,
@@ -345,7 +345,6 @@ class TrainingClient:
345
345
  def _run_save() -> CheckpointResult:
346
346
  try:
347
347
  from pathlib import Path
348
- import json
349
348
 
350
349
  save_path = Path(path)
351
350
  save_path.parent.mkdir(parents=True, exist_ok=True)
@@ -673,4 +672,4 @@ class DistillationTrainingClient(TrainingClient):
673
672
 
674
673
 
675
674
  # Import at end to avoid circular dependency
676
- from .sampling_client import SamplingClient
675
+ from .sampling_client import SamplingClient # noqa: E402
mlxsmith/server.py CHANGED
@@ -10,15 +10,10 @@ This module provides:
10
10
 
11
11
  from __future__ import annotations
12
12
 
13
- import json
14
- import time
15
- import uuid
16
13
  from pathlib import Path
17
- from typing import Any, Dict, List, Optional
18
14
 
19
15
  from fastapi import FastAPI
20
- from fastapi.responses import StreamingResponse, HTMLResponse
21
- from pydantic import BaseModel
16
+ from fastapi.responses import HTMLResponse
22
17
 
23
18
  from .config import ProjectConfig
24
19
  from .models import resolve_model_spec
mlxsmith/train/lora.py CHANGED
@@ -160,6 +160,8 @@ def apply_lora(model: Any, cfg: LoRAConfig) -> dict:
160
160
  keys = sorted(_keys_for_target_modules(model, cfg.target_modules))
161
161
 
162
162
  if tuner_utils is not None and hasattr(tuner_utils, "linear_to_lora_layers"):
163
+ # Freeze all base weights first so only LoRA params are trainable
164
+ model.freeze()
163
165
  # MLX-LM format
164
166
  config = {
165
167
  "rank": int(cfg.r),
@@ -256,6 +258,8 @@ def apply_adapter(model: Any, adapter_dir: str | Path) -> dict | None:
256
258
  adapter_cfg = load_adapter_config(adapter_dir)
257
259
  tuner_utils, _ = _try_mlx_lm_utils()
258
260
  if adapter_cfg is not None and tuner_utils is not None and hasattr(tuner_utils, "load_adapters"):
261
+ # Freeze base weights so only adapter params are trainable
262
+ model.freeze()
259
263
  tuner_utils.load_adapters(model, str(adapter_dir))
260
264
  return adapter_cfg
261
265
 
mlxsmith/util.py CHANGED
@@ -46,7 +46,6 @@ class SystemInfo:
46
46
  has_metal: Optional[bool]
47
47
  has_mlx: bool
48
48
  mlx_version: Optional[str]
49
- has_zmlx: bool
50
49
 
51
50
  def detect_system() -> SystemInfo:
52
51
  has_mlx = False
@@ -58,10 +57,6 @@ def detect_system() -> SystemInfo:
58
57
  except Exception:
59
58
  pass
60
59
 
61
- import importlib.util
62
-
63
- has_zmlx = importlib.util.find_spec("zmlx") is not None
64
-
65
60
  # Metal detection (best-effort): on macOS we assume Metal is present; for CI, this is not reliable.
66
61
  has_metal = None
67
62
  if sys.platform == "darwin":
@@ -83,7 +78,6 @@ def detect_system() -> SystemInfo:
83
78
  has_metal=has_metal,
84
79
  has_mlx=has_mlx,
85
80
  mlx_version=mlx_version,
86
- has_zmlx=has_zmlx,
87
81
  )
88
82
 
89
83
  def require(cond: bool, msg: str):
@@ -0,0 +1,283 @@
1
+ Metadata-Version: 2.4
2
+ Name: mlxsmith
3
+ Version: 0.1.2
4
+ Summary: Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
5
+ Author-email: Shannon Labs <hmbown@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
8
+ Project-URL: Repository, https://github.com/Hmbown/MLXSmith
9
+ Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
10
+ Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Operating System :: MacOS :: MacOS X
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: typer>=0.9.0
26
+ Requires-Dist: rich>=13.7.0
27
+ Requires-Dist: pyyaml>=6.0.1
28
+ Requires-Dist: pydantic>=2.5.0
29
+ Requires-Dist: pydantic-settings>=2.2.1
30
+ Requires-Dist: tomli>=2.0.1; python_version < "3.11"
31
+ Requires-Dist: huggingface_hub>=1.3.4
32
+ Requires-Dist: jsonschema>=4.21.0
33
+ Provides-Extra: mlx
34
+ Requires-Dist: mlx>=0.30.4; extra == "mlx"
35
+ Provides-Extra: llm
36
+ Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
37
+ Requires-Dist: transformers>=5.0.0; extra == "llm"
38
+ Requires-Dist: datasets>=3.0.0; extra == "llm"
39
+ Provides-Extra: serve
40
+ Requires-Dist: fastapi>=0.128.0; extra == "serve"
41
+ Requires-Dist: uvicorn>=0.40.0; extra == "serve"
42
+ Requires-Dist: httpx>=0.28.0; extra == "serve"
43
+ Provides-Extra: dev
44
+ Requires-Dist: pytest>=9.0.0; extra == "dev"
45
+ Requires-Dist: ruff>=0.14.0; extra == "dev"
46
+ Provides-Extra: all
47
+ Requires-Dist: mlx>=0.30.4; extra == "all"
48
+ Requires-Dist: mlx-lm>=0.30.5; extra == "all"
49
+ Requires-Dist: transformers>=5.0.0; extra == "all"
50
+ Requires-Dist: datasets>=3.0.0; extra == "all"
51
+ Requires-Dist: fastapi>=0.128.0; extra == "all"
52
+ Requires-Dist: uvicorn>=0.40.0; extra == "all"
53
+ Requires-Dist: httpx>=0.28.0; extra == "all"
54
+ Dynamic: license-file
55
+
56
+ # mlxsmith
57
+
58
+ Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
59
+
60
+ **Status:** alpha (v0.1.2). Full training pipeline validated on Qwen3-4B.
61
+
62
+ ## Install
63
+
64
+ MLX training and serving require macOS on Apple Silicon.
65
+ Other platforms can use data tools and mock backends.
66
+
67
+ ```bash
68
+ python -m venv .venv && source .venv/bin/activate
69
+ pip install -U pip
70
+
71
+ # Core CLI (data tools, config, project scaffolding)
72
+ pip install mlxsmith
73
+
74
+ # Apple Silicon training + serving
75
+ pip install "mlxsmith[mlx,llm,serve]"
76
+
77
+ # Everything
78
+ pip install "mlxsmith[all]"
79
+ ```
80
+
81
+ ## Quickstart
82
+
83
+ ```bash
84
+ mlxsmith init myproj
85
+ cd myproj
86
+ mlxsmith doctor # check Python, MLX, Metal
87
+ ```
88
+
89
+ ## Training
90
+
91
+ ### SFT (LoRA/QLoRA)
92
+
93
+ ```bash
94
+ mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
95
+ ```
96
+
97
+ Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
98
+
99
+ ### Preference tuning (DPO/ORPO)
100
+
101
+ ```bash
102
+ mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
103
+ --data data/prefs --algo dpo
104
+ ```
105
+
106
+ Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
107
+
108
+ ### Reinforced fine-tuning (GRPO)
109
+
110
+ ```bash
111
+ mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
112
+ --env envs/coding.yaml --verifier verifiers/pytest.py
113
+ ```
114
+
115
+ GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
116
+
117
+ ### Knowledge distillation
118
+
119
+ ```bash
120
+ # Offline distillation (teacher generates, student learns)
121
+ mlxsmith distill --teacher large-model --student small-model --mode offline
122
+
123
+ # Online preference distillation (OPD)
124
+ mlxsmith distill --teacher large-model --student small-model --mode opd
125
+ ```
126
+
127
+ ### Full pipeline
128
+
129
+ ```bash
130
+ # Run SFT → Pref → RFT in sequence
131
+ mlxsmith pipeline
132
+ ```
133
+
134
+ ## Serving
135
+
136
+ OpenAI-compatible `/v1/chat/completions` endpoint.
137
+
138
+ ```bash
139
+ mlxsmith serve --model runs/sft_0001/adapter --port 8080
140
+ ```
141
+
142
+ ```bash
143
+ curl http://localhost:8080/v1/chat/completions \
144
+ -H 'Content-Type: application/json' \
145
+ -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
146
+ ```
147
+
148
+ Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
149
+
150
+ ## Data tools
151
+
152
+ ```bash
153
+ mlxsmith data presets # list built-in datasets
154
+ mlxsmith data pull alpaca # pull a preset
155
+ mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
156
+ mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
157
+ mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
158
+ mlxsmith data validate data/sft/train.jsonl # structure check
159
+ ```
160
+
161
+ Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
162
+
163
+ ## Model management
164
+
165
+ ```bash
166
+ # Pull + convert HF model to MLX
167
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
168
+
169
+ # With quantization
170
+ mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
171
+
172
+ # Merge adapters
173
+ mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
174
+ ```
175
+
176
+ ## HF auth
177
+
178
+ ```bash
179
+ mlxsmith auth login --token "$HF_TOKEN"
180
+ mlxsmith auth status
181
+ mlxsmith auth logout
182
+ ```
183
+
184
+ ## Eval and bench
185
+
186
+ ```bash
187
+ # Evaluation suite (pass@k with verifier checks)
188
+ mlxsmith eval --suite eval/suites/coding.yaml
189
+
190
+ # Benchmark inference or training throughput
191
+ mlxsmith bench --mode inference
192
+ mlxsmith bench --mode trainer
193
+ mlxsmith bench --mode end_to_end
194
+ ```
195
+
196
+ ## Verifiers
197
+
198
+ Built-in verifiers for eval, RFT, and preference tuning:
199
+
200
+ - **regex** — pattern matching on completions
201
+ - **jsonschema** — JSON structure validation
202
+ - **pytest** — sandboxed test execution
203
+ - **docker** — containerized verification
204
+ - **compose** — multi-verifier composition (AND/OR/weighted)
205
+
206
+ See `docs/VERIFIERS.md` for the verifier API.
207
+
208
+ ## Environment plugin system
209
+
210
+ ```bash
211
+ mlxsmith env list # list available environments
212
+ mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
213
+ mlxsmith env init my_env # scaffold a new environment
214
+ mlxsmith env install ./my_env # install from directory
215
+ mlxsmith env package ./my_env # create distributable tarball
216
+ mlxsmith env run envs/coding.yaml # execute RFT with this environment
217
+ ```
218
+
219
+ Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
220
+
221
+ ## Config system
222
+
223
+ ```bash
224
+ mlxsmith config show # display merged config (YAML/JSON/TOML)
225
+ mlxsmith config show --sources # show where each value comes from
226
+ mlxsmith config init # create default mlxsmith.yaml
227
+ mlxsmith config validate # check config structure
228
+ mlxsmith config env # show environment variable mapping
229
+ ```
230
+
231
+ Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
232
+
233
+ ## SDK (programmatic API)
234
+
235
+ For building custom training loops:
236
+
237
+ ```python
238
+ from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
239
+
240
+ loaded = load_model("path/to/model", config)
241
+
242
+ # Sampling with logprobs
243
+ sampler = SamplingClient(loaded.backend)
244
+ result = sampler.sample("prompt", logprobs_k=5)
245
+
246
+ # Training operations
247
+ trainer = TrainingClient(loaded.backend)
248
+ trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
249
+ fb = trainer.forward_backward(batch)
250
+ trainer.optim_step(fb.result().grads)
251
+ ```
252
+
253
+ Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
254
+
255
+ ## Research
256
+
257
+ ### RLM self-play loop
258
+
259
+ RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
260
+
261
+ ```bash
262
+ mlxsmith rlm # single-process RLM
263
+ mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
264
+ mlxsmith rlm status # check iteration state
265
+ mlxsmith rlm history # view history
266
+ ```
267
+
268
+ Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
269
+
270
+ ## Docs
271
+
272
+ - `docs/PROJECT_FORMAT.md` — project layout and artifacts
273
+ - `docs/VERIFIERS.md` — verifier API and sandbox behavior
274
+ - `docs/COMPATIBILITY.md` — tested versions and model families
275
+ - `docs/ENVIRONMENTS.md` — environment plugin system
276
+ - `docs/orchestrator.md` — multi-process RLM orchestrator
277
+ - `docs/rlm-ctl.md` — RLM training guide
278
+ - `docs/ROADMAP.md` — product direction and milestones
279
+ - `docs/README.md` — full docs index
280
+
281
+ ## License
282
+
283
+ MIT
@@ -2,55 +2,54 @@ mlxsmith/__init__.py,sha256=CJZKl9Hp16DYlQR7yqstir-cL4n7GCw90d-meXliCHk,48
2
2
  mlxsmith/adapters.py,sha256=wkQ2q_ugaxCviNARSmxehwBcc2_NKVJ7mOofT-y30TY,1318
3
3
  mlxsmith/auth.py,sha256=_j_gx5ccZfpHs0_Xmpcgh_ELhX3ZBJLg2YYpjA-aPRI,2195
4
4
  mlxsmith/bench.py,sha256=VBgY9uOGm3xhmL3UrNNOnUoa3P0yaVQQ7wxykIEmDEw,3778
5
- mlxsmith/cli.py,sha256=YHjrPti1OCe0m2gpEwygiJ6_x-xeycKHRDYCksdcOuM,35750
6
- mlxsmith/config.py,sha256=iFDhO7KcAtq1KySGD7TfNMYqbQcec0Tc0-VJdr5Gwo4,16385
7
- mlxsmith/config_models.py,sha256=pMsLGyC9J9T9jqs5ipYFUuEkfir7iiDA4EZdcb65k5g,8407
5
+ mlxsmith/cli.py,sha256=MAS21bSz4DTkdMAao99KdtI0FtZFUEVQatSsS7VHPQ0,35713
6
+ mlxsmith/config.py,sha256=K1VbN-3WoWf4vzZ6BNeTgEz2DFH8s9YminqwyNBVLj0,16389
7
+ mlxsmith/config_models.py,sha256=x827pTs-SfPyqXI3mPR6h-cz7KQ69zQICPAaeGlW-A4,8399
8
8
  mlxsmith/data.py,sha256=3ZlNS8bnD7LlWACEmULbf8RGQzCuf0QulFpI1PWvNuI,16160
9
9
  mlxsmith/eval.py,sha256=nSARSEKKwZM8Ot5rUDDpGikaClGNxvg0ifgGkTA6mM0,3792
10
10
  mlxsmith/infer.py,sha256=ekpHhTird0dnTJzFOc-O98rjwkEKgAr9AFicKlaB3MA,4610
11
11
  mlxsmith/models.py,sha256=BRaPTxzqy-5KEKdccveMgjpbRP0ZmbRnA_su8rz2P4k,8033
12
12
  mlxsmith/runs.py,sha256=2voYBryGGMlAKskHJ7TDiIPQL2_fFxSQ8RgtfGZ7ccg,1409
13
- mlxsmith/server.py,sha256=CBxmV9WwpyivsHytVALgT4DOGh1vqjwm4W_iHBH79rg,10821
14
- mlxsmith/util.py,sha256=8fagFtMP2YS1qlWkCt8bPWAz2jAgfvAf1ipNGJZAgIk,4544
15
- mlxsmith/accel/__init__.py,sha256=Mv2mj-2bLqUILhMUCjMLu7JORcu0-cGBOri32j7O5Xo,291
13
+ mlxsmith/server.py,sha256=Fk-i9xK_Teq0Z0m-W1GRJVtcG0nYvd4bQ85lnAUuT1w,10690
14
+ mlxsmith/util.py,sha256=Cke2FxIVNmvpW9ElPxL5bc0JO_YFVHWtBwpR3QRtfWQ,4410
15
+ mlxsmith/accel/__init__.py,sha256=fBY65q66jql1q5YMT9aIo96dgvVLHqS_LoJbVNA2xHY,201
16
16
  mlxsmith/accel/base.py,sha256=o0kmxV68KbxOmucC3eDjKbFA8dfTT2ETqN0XD_l3mM0,435
17
17
  mlxsmith/accel/none.py,sha256=WhxECIBv-pE63Vh1Iv86ObgT9JHOi4hA4BUyJc__sKU,362
18
- mlxsmith/accel/zmlx_backend.py,sha256=JfzQ44v9hrCJgcqU018ZD7qLNlubIe09CwYRpKyfwR8,1529
19
18
  mlxsmith/api/__init__.py,sha256=IrpIXDkUJm4BZqahYOK_0BkxvomlscEvCoLCm4GDxo8,998
20
- mlxsmith/api/handlers.py,sha256=5YKQfHLG2mWjCugOkG1L3veiPMz_Qg_s4h12xx6eibs,47068
19
+ mlxsmith/api/handlers.py,sha256=94Spq4glFp7mRwmKrFqt7erd1Af_PxVP_vpxCo2UFdQ,46896
21
20
  mlxsmith/api/schemas.py,sha256=Q18kF9FKtvT1vdnXy6feSNTtCV2FiRWDzfdsPzc0te8,19316
22
21
  mlxsmith/envs/__init__.py,sha256=t7QiEHtfyP1dUCj-4TJUN0hd9lRqBKYd5Ek7dgEwus4,671
23
22
  mlxsmith/envs/system.py,sha256=2bChkOxm2S7d0WCrweHGhoI6-xOYDxlC0YbHH6Ibjq4,12782
24
23
  mlxsmith/envs/token_env.py,sha256=rhv2o3eI1GyTtfAXG72z-31amNGaLv0KW56mEsWkXlY,6709
25
24
  mlxsmith/llm/__init__.py,sha256=jWEkXGdvwZ8tUYHVqWW3SYHXG-LSWaGbdwOR0mF_4Zw,396
26
25
  mlxsmith/llm/backend.py,sha256=_xGfdJ30_6Nq8zIqMjTemCZWV-vUi9qV0djfwxny5SM,4335
27
- mlxsmith/llm/interface.py,sha256=zmqJuyiEQqjEpDjZwooS1hp1GfTPCTiWYJ5nPw8xSqg,6633
26
+ mlxsmith/llm/interface.py,sha256=udQl_R7ecmM4Nh20P50Nmnv2h853ByrgevjQIRDxX4g,6601
28
27
  mlxsmith/llm/mlx_lm_backend.py,sha256=OitqY_6LqnfqbN7GZz39Ma3cItjjNqHXF3SV3AZsHxk,18284
29
- mlxsmith/llm/mock_backend.py,sha256=DXXnMlXZGCtf0datgjFc5z_X8JMEKaYDRPXKU1erGdQ,7449
28
+ mlxsmith/llm/mock_backend.py,sha256=ZTkPRnRmCXSkhOJ1TLbSgTnAcLTKa9sq6-zzbTEX6Qc,7439
30
29
  mlxsmith/llm/registry.py,sha256=ZmYE-WclyMo6z0HwUufqt3tKT4E84xZ6I-PFu1Z5nws,309
31
30
  mlxsmith/orchestrator/__init__.py,sha256=oc4qIkISZMGMvYeOqU8lDDmFL3uxDYJHsv_rra9DH-E,792
32
- mlxsmith/orchestrator/daemon.py,sha256=MRQf84sCjeShBFcOcF7yfP6HhHl7IHrnmw0nV50mJrI,16360
33
- mlxsmith/orchestrator/inference_worker.py,sha256=4AI_j7qnrnwXObBxSa7YHlZHnGfqou-W1fpqHYt8jpQ,17981
34
- mlxsmith/orchestrator/queue.py,sha256=8c1n-fiW3ITcLbwOeFlH_AEZdJYvZumy8EczJ0lh4NA,11301
35
- mlxsmith/orchestrator/trainer_worker.py,sha256=uvJQesXjfdsiNRsO2FVHhUk0WmMB_PQNSPff5U9Swp4,16061
31
+ mlxsmith/orchestrator/daemon.py,sha256=VJFF8s_85h4C-YM14wRUlzDHrhhVPfQOztmNLKyRk30,16107
32
+ mlxsmith/orchestrator/inference_worker.py,sha256=PfmsanrBnx9HZNqG00jTQQTKqDa2bl-wUtYAWtxfzvs,17963
33
+ mlxsmith/orchestrator/queue.py,sha256=E8VymvJi2zEpuTwsG7JB-vROJGGS5evPPhIpkmdwtq4,11286
34
+ mlxsmith/orchestrator/trainer_worker.py,sha256=IM7vOhkYlCKoQSC1tNobkeIVnkmsz4DtAWfu24bjJXY,16015
36
35
  mlxsmith/rlm/__init__.py,sha256=Q09oRONXWTFXuWwMJOpGWg0I-UDkuib0OA1O_cNFp2U,236
37
36
  mlxsmith/rlm/corpus.py,sha256=-p12H650_ybe2kXC219M4wXYpD08QHUpY92ErVjSfX8,2112
38
37
  mlxsmith/rlm/gating.py,sha256=L18niYKEezphASdsgzW6pz3PN7ylA-L5Wu4_GLLVfHw,2455
39
38
  mlxsmith/rlm/generate.py,sha256=q1v_TP8sqVj05omhoF60Ns1iX6yClgc7lP6njz4lK18,7601
40
39
  mlxsmith/rlm/history.py,sha256=Vm4JtWqsZnqB-fuo3zWfweeogmmLTL3VHaYZ45vrkz8,299
41
40
  mlxsmith/rlm/inference.py,sha256=ntCEKxD1KrkIXgZNQbD4jhS5rJPtwcVYc8qLc5E5cnc,5297
42
- mlxsmith/rlm/loop.py,sha256=WLRP1PI5PX7OjSEZsurrCwSTVVPNVIx4-7td1ihSMY0,49955
41
+ mlxsmith/rlm/loop.py,sha256=nkAR5KYErcq893kQYigFkl3NG469ZUu1CkN_sS1ObAM,49900
43
42
  mlxsmith/rlm/mutate.py,sha256=_NUNMpVCRaEDgtzI8J2NOTcj5NnycZnP_UoUpFacjTs,2553
44
43
  mlxsmith/rlm/trainer.py,sha256=RRXPlJy4SySpLZGge0ORMYs7HeiWgfGQNeMBOBfG4Ys,3014
45
- mlxsmith/rlm/weights.py,sha256=NO7wjl2T0eXTVFoYrzPT_IUmaLvD2z-zSSyKpcX93kY,8463
44
+ mlxsmith/rlm/weights.py,sha256=tgl4Uc80QF9YpCCr3ewBmL7uru9As2fDA1Z1SgZn-e4,8455
46
45
  mlxsmith/sdk/__init__.py,sha256=42WpTgC309sYKp8SArULBWz0trVN51THcjvPdVh-thc,10777
47
- mlxsmith/sdk/future.py,sha256=mleqPgJ997hSuZuQegvS2GoOxqo_gd4pfh37gv70APc,16873
46
+ mlxsmith/sdk/future.py,sha256=WmYB-fDstaEuv-FUNX_S7IJSENbVEsYYEEVzH02ImLk,16868
48
47
  mlxsmith/sdk/losses.py,sha256=lJi3R7Red_QO3IatbhKi_GBI7kM0yu-kS14xN2kX_04,7532
49
- mlxsmith/sdk/sampling_client.py,sha256=AkjJHEZ8OLU8SZoALk7ds3NHMquiqk9GGZHqni5vB2g,24942
50
- mlxsmith/sdk/training_client.py,sha256=4GXcswTJypkymPO3E5DREUesGzlumbmUrTnt0T80wEI,24380
48
+ mlxsmith/sdk/sampling_client.py,sha256=o7jfgYpVWXrrIHo4-SrGAJx4FAlYdo198da27Jp0Yj4,24899
49
+ mlxsmith/sdk/training_client.py,sha256=71bSgS65ofXL9X4qGSyDhVL5asDxENAf95LV66pwC3g,24341
51
50
  mlxsmith/train/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
51
  mlxsmith/train/distill.py,sha256=9Xbzn6zt8uqYOwg_pJB2rZJj8y-nESP1OB8DLxnJ0jM,10378
53
- mlxsmith/train/lora.py,sha256=E8ymi1wUVsGp5-0DldvkSWDHNh_5Fhr6AelJOR5YoPw,8859
52
+ mlxsmith/train/lora.py,sha256=k3aNqBjMyE6rPGS2CAJRSDsTJiUa1ztjrA3k9N87IjY,9046
54
53
  mlxsmith/train/pref.py,sha256=-z7mj-MQu6dPb8y0U6sRFbqKU0nNQ3YRpC3CcmS3l5k,6987
55
54
  mlxsmith/train/rft.py,sha256=bf6z-h6VQKvMDZ0XN_ayZV44YsIvuwSzP1YRYrwSQ2M,18956
56
55
  mlxsmith/train/sft.py,sha256=w3QmLLoscNQzz-xbtmrCw46PbYIApvgeQi0XjxCop90,5590
@@ -61,9 +60,9 @@ mlxsmith/verifiers/jsonschema.py,sha256=hG_8c07Hwv-tpN2g0oxELwmLRxS8QGzRFwabmo4y
61
60
  mlxsmith/verifiers/pytest_verifier.py,sha256=ARNajzxUPNwtzSow6I2d0mLopZyvY29_d3F1sYVwEUY,2514
62
61
  mlxsmith/verifiers/regex.py,sha256=N7z3koE8Iy-a4DBs4404iQCNX2WGxequm5g4umric2Y,524
63
62
  mlxsmith/verifiers/types.py,sha256=FytBxB1OnNX1EcqZXSSs3WvL0GRv7byW4mfBMf6xP68,240
64
- mlxsmith-0.1.0.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
65
- mlxsmith-0.1.0.dist-info/METADATA,sha256=yRnNRKdCFPZQRgAN7qtxCDBrUJxplOqK3mTV_Uoh2jA,4812
66
- mlxsmith-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
67
- mlxsmith-0.1.0.dist-info/entry_points.txt,sha256=ys7GcKEjhzhkkTMBmmaNavTgsjqOuFnCKIG2w8Wcn6w,46
68
- mlxsmith-0.1.0.dist-info/top_level.txt,sha256=hKBwc8bn7uoI-_5Yhcq1T3IuChFhUFdzItIkZK1up6A,9
69
- mlxsmith-0.1.0.dist-info/RECORD,,
63
+ mlxsmith-0.1.2.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
64
+ mlxsmith-0.1.2.dist-info/METADATA,sha256=mtwo3IlnvH9ffCKWSHbxRMTLQFyzrvDi29BCyzbteqk,8963
65
+ mlxsmith-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
66
+ mlxsmith-0.1.2.dist-info/entry_points.txt,sha256=ys7GcKEjhzhkkTMBmmaNavTgsjqOuFnCKIG2w8Wcn6w,46
67
+ mlxsmith-0.1.2.dist-info/top_level.txt,sha256=hKBwc8bn7uoI-_5Yhcq1T3IuChFhUFdzItIkZK1up6A,9
68
+ mlxsmith-0.1.2.dist-info/RECORD,,
@@ -1,42 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Any, Dict
4
- from .base import AccelStats
5
-
6
- class ZMLXBackend:
7
- name = "zmlx"
8
-
9
- def __init__(self):
10
- self._available = False
11
- self._notes = {}
12
- try:
13
- import zmlx # type: ignore
14
- self._available = True
15
- self._notes["zmlx_version"] = getattr(zmlx, "__version__", None)
16
- except Exception as e:
17
- self._available = False
18
- self._notes["error"] = f"{type(e).__name__}: {e}"
19
-
20
- def patch(self) -> None:
21
- if not self._available:
22
- # soft fail; caller should report status
23
- return
24
- # ZMLX can patch ops/modules. We keep this intentionally minimal and safe.
25
- try:
26
- import zmlx # type: ignore
27
- # If ZMLX provides a global patch hook, call it; otherwise, no-op.
28
- patch_fn = getattr(zmlx, "patch", None)
29
- if callable(patch_fn):
30
- patch_fn()
31
- self._notes["patched"] = True
32
- else:
33
- self._notes["patched"] = False
34
- self._notes["hint"] = "No zmlx.patch() found; implement patch hook or integrate per-module."
35
- except Exception as e:
36
- self._notes["patched_error"] = f"{type(e).__name__}: {e}"
37
-
38
- def warmup(self, model: Any, example_batch: Any) -> Dict[str, Any]:
39
- return {"warmup": "not_implemented", "notes": self._notes}
40
-
41
- def stats(self) -> AccelStats:
42
- return AccelStats(backend="zmlx", notes=self._notes)
@@ -1,163 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: mlxsmith
3
- Version: 0.1.0
4
- Summary: Apple Silicon MLX fine-tuning and OpenAI-compatible serving (SFT stable; preference/RL experimental).
5
- Author-email: Shannon Labs <hmbown@gmail.com>
6
- License: MIT
7
- Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
8
- Project-URL: Repository, https://github.com/Hmbown/MLXSmith
9
- Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
10
- Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: Intended Audience :: Science/Research
14
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
- Classifier: License :: OSI Approved :: MIT License
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3 :: Only
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Operating System :: MacOS :: MacOS X
22
- Requires-Python: >=3.10
23
- Description-Content-Type: text/markdown
24
- License-File: LICENSE
25
- Requires-Dist: typer>=0.9.0
26
- Requires-Dist: rich>=13.7.0
27
- Requires-Dist: pyyaml>=6.0.1
28
- Requires-Dist: pydantic>=2.5.0
29
- Requires-Dist: pydantic-settings>=2.2.1
30
- Requires-Dist: tomli>=2.0.1; python_version < "3.11"
31
- Requires-Dist: huggingface_hub>=1.3.4
32
- Requires-Dist: jsonschema>=4.21.0
33
- Provides-Extra: mlx
34
- Requires-Dist: mlx>=0.30.4; extra == "mlx"
35
- Provides-Extra: llm
36
- Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
37
- Requires-Dist: transformers>=5.0.0; extra == "llm"
38
- Requires-Dist: datasets>=3.0.0; extra == "llm"
39
- Provides-Extra: serve
40
- Requires-Dist: fastapi>=0.128.0; extra == "serve"
41
- Requires-Dist: uvicorn>=0.40.0; extra == "serve"
42
- Requires-Dist: httpx>=0.28.0; extra == "serve"
43
- Provides-Extra: zmlx
44
- Requires-Dist: zmlx; extra == "zmlx"
45
- Provides-Extra: dev
46
- Requires-Dist: pytest>=9.0.0; extra == "dev"
47
- Requires-Dist: ruff>=0.14.0; extra == "dev"
48
- Provides-Extra: all
49
- Requires-Dist: mlx>=0.30.4; extra == "all"
50
- Requires-Dist: mlx-lm>=0.30.5; extra == "all"
51
- Requires-Dist: transformers>=5.0.0; extra == "all"
52
- Requires-Dist: datasets>=3.0.0; extra == "all"
53
- Requires-Dist: fastapi>=0.128.0; extra == "all"
54
- Requires-Dist: uvicorn>=0.40.0; extra == "all"
55
- Requires-Dist: httpx>=0.28.0; extra == "all"
56
- Dynamic: license-file
57
-
58
- # mlxsmith
59
-
60
- Apple Silicon MLX fine-tuning and OpenAI-compatible serving.
61
- SFT + serving are stable. Preference/RL/RLM features are experimental.
62
-
63
- Status: alpha (2026-02-02).
64
-
65
- ## Stable features
66
- - Project init, config, data tools, HF auth, model pull/convert.
67
- - SFT (LoRA/QLoRA) training with run tracking and adapters.
68
- - Inference and OpenAI-compatible /v1/chat/completions serving.
69
- - Basic eval/bench and verifier plumbing (regex/jsonschema/pytest).
70
-
71
- ## Experimental features
72
- - Preference tuning (DPO/ORPO).
73
- - GRPO-style RFT.
74
- - RLM self-play loop (research).
75
- - Distill/OPD and orchestrated RLM.
76
-
77
- ## Install
78
-
79
- MLX is only available on Apple Silicon. Other platforms can still use data tools
80
- and mock backends, but MLX training and serving require macOS on Apple Silicon.
81
-
82
- ```bash
83
- python -m venv .venv && source .venv/bin/activate
84
- pip install -U pip
85
-
86
- # Core CLI
87
- pip install mlxsmith
88
-
89
- # Apple Silicon training + serving
90
- pip install "mlxsmith[mlx,llm,serve]"
91
- ```
92
-
93
- ## Quickstart
94
-
95
- ```bash
96
- mlxsmith init myproj
97
- cd myproj
98
- mlxsmith doctor
99
- ```
100
-
101
- ## HF auth (optional)
102
-
103
- ```bash
104
- mlxsmith auth login --token "$HF_TOKEN"
105
- mlxsmith auth status
106
- mlxsmith auth logout
107
- ```
108
-
109
- ## Pull + convert a model (HF -> MLX)
110
-
111
- ```bash
112
- mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
113
- # outputs to cache/mlx/Qwen__Qwen3-4B-Instruct-2507
114
- ```
115
-
116
- Optional quantization:
117
-
118
- ```bash
119
- mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
120
- ```
121
-
122
- ## SFT (LoRA/QLoRA)
123
-
124
- ```bash
125
- mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
126
- ```
127
-
128
- ## Serve (OpenAI-compatible)
129
-
130
- ```bash
131
- mlxsmith serve --model runs/sft_0001/adapter --port 8080
132
- ```
133
-
134
- Sample request:
135
-
136
- ```bash
137
- curl http://localhost:8080/v1/chat/completions \
138
- -H 'Content-Type: application/json' \
139
- -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
140
- ```
141
-
142
- To enable the optional UI/monitor dashboard, set `serve.ui: true` in `mlxsmith.yaml`.
143
-
144
- ## Experimental commands
145
-
146
- - `mlxsmith pref` (DPO/ORPO)
147
- - `mlxsmith rft` (GRPO-style)
148
- - `mlxsmith rlm` / `mlxsmith pipeline` (self-play loop)
149
- - `mlxsmith distill` (offline/OPD)
150
- - `mlxsmith eval` / `mlxsmith bench`
151
-
152
- ## Docs
153
-
154
- - `docs/PROJECT_FORMAT.md` for project layout and artifacts.
155
- - `docs/VERIFIERS.md` for verifier API and sandbox behavior.
156
- - `docs/COMPATIBILITY.md` for tested versions and model families.
157
- - `docs/ENVIRONMENTS.md` for the environment plugin system.
158
- - `docs/ROADMAP.md` for product direction and milestones.
159
- - `docs/README.md` for the full docs index.
160
-
161
- ## License
162
-
163
- MIT