mlxsmith 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlxsmith/api/handlers.py +2 -5
- mlxsmith/cli.py +4 -4
- mlxsmith/config.py +1 -2
- mlxsmith/llm/interface.py +3 -5
- mlxsmith/llm/mock_backend.py +1 -1
- mlxsmith/orchestrator/daemon.py +4 -9
- mlxsmith/orchestrator/inference_worker.py +1 -1
- mlxsmith/orchestrator/queue.py +2 -2
- mlxsmith/orchestrator/trainer_worker.py +0 -2
- mlxsmith/rlm/loop.py +6 -8
- mlxsmith/rlm/weights.py +1 -1
- mlxsmith/sdk/future.py +2 -3
- mlxsmith/sdk/sampling_client.py +1 -2
- mlxsmith/sdk/training_client.py +3 -4
- mlxsmith/server.py +1 -6
- mlxsmith/train/lora.py +4 -0
- mlxsmith-0.1.1.dist-info/METADATA +293 -0
- {mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/RECORD +22 -22
- mlxsmith-0.1.0.dist-info/METADATA +0 -163
- {mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/WHEEL +0 -0
- {mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/entry_points.txt +0 -0
- {mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/top_level.txt +0 -0
mlxsmith/api/handlers.py
CHANGED
|
@@ -20,11 +20,10 @@ import uuid
|
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
|
|
22
22
|
|
|
23
|
-
from fastapi import APIRouter,
|
|
23
|
+
from fastapi import APIRouter, FastAPI, HTTPException, Request, Security, status
|
|
24
24
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
25
25
|
from fastapi.responses import StreamingResponse
|
|
26
26
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
27
|
-
from pydantic import BaseModel
|
|
28
27
|
|
|
29
28
|
from .schemas import (
|
|
30
29
|
AdapterReloadRequest,
|
|
@@ -1102,12 +1101,10 @@ def create_router(
|
|
|
1102
1101
|
models, use the list endpoint to check completion status.
|
|
1103
1102
|
"""
|
|
1104
1103
|
cache_dir = _get_cache_dir()
|
|
1105
|
-
|
|
1106
|
-
|
|
1104
|
+
|
|
1107
1105
|
try:
|
|
1108
1106
|
# Import here to avoid circular dependencies
|
|
1109
1107
|
from ..models import hf_pull
|
|
1110
|
-
from ..config import ProjectConfig
|
|
1111
1108
|
|
|
1112
1109
|
# Get HF token if available
|
|
1113
1110
|
hf_token = None
|
mlxsmith/cli.py
CHANGED
|
@@ -43,7 +43,7 @@ from .envs import (
|
|
|
43
43
|
|
|
44
44
|
app = typer.Typer(
|
|
45
45
|
add_completion=False,
|
|
46
|
-
help="mlxsmith — MLX fine-tuning
|
|
46
|
+
help="mlxsmith — Apple Silicon MLX fine-tuning toolkit: SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.",
|
|
47
47
|
)
|
|
48
48
|
console = Console()
|
|
49
49
|
|
|
@@ -564,7 +564,7 @@ def config_validate(
|
|
|
564
564
|
|
|
565
565
|
try:
|
|
566
566
|
cfg = load_config(cfg_path, require=True)
|
|
567
|
-
console.print(
|
|
567
|
+
console.print("[green]✓ Configuration is valid[/green]")
|
|
568
568
|
|
|
569
569
|
# Show summary
|
|
570
570
|
table = Table(title="Configuration Summary")
|
|
@@ -593,9 +593,9 @@ def config_env(
|
|
|
593
593
|
"""Show available environment variables."""
|
|
594
594
|
cfg = ProjectConfig()
|
|
595
595
|
|
|
596
|
-
console.print(
|
|
596
|
+
console.print("\n[bold]Environment Variable Configuration[/bold]")
|
|
597
597
|
console.print(f"Prefix: [cyan]{prefix}[/cyan]")
|
|
598
|
-
console.print(
|
|
598
|
+
console.print("Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
|
|
599
599
|
|
|
600
600
|
table = Table(title=f"Available {prefix}* Environment Variables")
|
|
601
601
|
table.add_column("Environment Variable")
|
mlxsmith/config.py
CHANGED
|
@@ -18,7 +18,6 @@ Config files support @path syntax:
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
import json
|
|
21
|
-
import os
|
|
22
21
|
from pathlib import Path
|
|
23
22
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
24
23
|
|
|
@@ -94,7 +93,7 @@ class ProjectSettings(BaseSettings):
|
|
|
94
93
|
|
|
95
94
|
|
|
96
95
|
# Import CLI aliases from models
|
|
97
|
-
from .config_models import CLI_ALIASES as _CLI_ALIASES
|
|
96
|
+
from .config_models import CLI_ALIASES as _CLI_ALIASES # noqa: E402
|
|
98
97
|
|
|
99
98
|
|
|
100
99
|
def resolve_config_path(config: Union[str, Path], root: Optional[Path] = None) -> Path:
|
mlxsmith/llm/interface.py
CHANGED
|
@@ -182,11 +182,9 @@ def compute_logprobs(
|
|
|
182
182
|
else:
|
|
183
183
|
prompt_len = len(prompt_ids)
|
|
184
184
|
|
|
185
|
-
#
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
# Use backend's sequence_logprob if available
|
|
189
|
-
seq_logprob = backend.sequence_logprob(ids, prompt_len=prompt_len)
|
|
185
|
+
# Decode and compute sequence-level logprob (used by callers via backend state)
|
|
186
|
+
backend.decode(ids)
|
|
187
|
+
backend.sequence_logprob(ids, prompt_len=prompt_len)
|
|
190
188
|
|
|
191
189
|
# For per-token logprobs, we'd need to do a forward pass
|
|
192
190
|
# This is a simplified version
|
mlxsmith/llm/mock_backend.py
CHANGED
mlxsmith/orchestrator/daemon.py
CHANGED
|
@@ -9,27 +9,22 @@ Manages rollout requests, training batches, and weight updates.
|
|
|
9
9
|
|
|
10
10
|
from __future__ import annotations
|
|
11
11
|
|
|
12
|
-
import json
|
|
13
12
|
import multiprocessing as mp
|
|
14
13
|
import signal
|
|
15
|
-
import sys
|
|
16
14
|
import time
|
|
17
15
|
import traceback
|
|
18
16
|
from dataclasses import dataclass, field
|
|
19
17
|
from pathlib import Path
|
|
20
|
-
from typing import Any, Dict, List, Optional
|
|
18
|
+
from typing import Any, Dict, List, Optional
|
|
21
19
|
|
|
22
20
|
from rich.console import Console
|
|
23
21
|
|
|
24
22
|
from ..config import ProjectConfig
|
|
25
|
-
from ..rlm.
|
|
26
|
-
from ..rlm.gating import load_state, save_state, should_accept, update_state
|
|
27
|
-
from ..rlm.history import append_history
|
|
28
|
-
from ..rlm.inference import Rollout, build_tasks
|
|
23
|
+
from ..rlm.gating import load_state
|
|
29
24
|
from ..rlm.weights import WeightPointerStore, WeightPointerIPC
|
|
30
25
|
from ..runs import new_run, snapshot_config
|
|
31
|
-
from ..util import ensure_dir, now_ts
|
|
32
|
-
from .queue import MessageQueue, MessageType
|
|
26
|
+
from ..util import ensure_dir, now_ts
|
|
27
|
+
from .queue import MessageQueue, MessageType
|
|
33
28
|
from .inference_worker import InferenceConfig, run_inference_worker
|
|
34
29
|
from .trainer_worker import TrainerConfig, run_trainer_worker
|
|
35
30
|
|
|
@@ -26,7 +26,7 @@ from fastapi.responses import StreamingResponse
|
|
|
26
26
|
from ..config import ProjectConfig
|
|
27
27
|
from ..llm.registry import get_llm_backend
|
|
28
28
|
from ..models import resolve_model_spec
|
|
29
|
-
from ..rlm.weights import WeightPointerStore
|
|
29
|
+
from ..rlm.weights import WeightPointerStore
|
|
30
30
|
from .queue import MessageQueue, MessageType, Message
|
|
31
31
|
|
|
32
32
|
|
mlxsmith/orchestrator/queue.py
CHANGED
|
@@ -11,11 +11,11 @@ import json
|
|
|
11
11
|
import multiprocessing as mp
|
|
12
12
|
import time
|
|
13
13
|
import uuid
|
|
14
|
-
from dataclasses import
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
15
|
from enum import Enum, auto
|
|
16
16
|
from pathlib import Path
|
|
17
17
|
from queue import Empty
|
|
18
|
-
from typing import Any, Dict, List, Optional
|
|
18
|
+
from typing import Any, Dict, List, Optional
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class MessageType(Enum):
|
|
@@ -9,7 +9,6 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import json
|
|
11
11
|
import signal
|
|
12
|
-
import sys
|
|
13
12
|
import time
|
|
14
13
|
import traceback
|
|
15
14
|
from collections import defaultdict
|
|
@@ -17,7 +16,6 @@ from dataclasses import dataclass, field
|
|
|
17
16
|
from pathlib import Path
|
|
18
17
|
from typing import Any, Dict, List, Optional
|
|
19
18
|
|
|
20
|
-
from ..config import ProjectConfig
|
|
21
19
|
from ..llm.registry import get_llm_backend
|
|
22
20
|
from ..rlm.inference import Rollout
|
|
23
21
|
from ..rlm.weights import WeightPointerStore, WeightPointerIPC
|
mlxsmith/rlm/loop.py
CHANGED
|
@@ -15,12 +15,11 @@ from __future__ import annotations
|
|
|
15
15
|
import json
|
|
16
16
|
import multiprocessing as mp
|
|
17
17
|
import signal
|
|
18
|
-
import sys
|
|
19
18
|
import time
|
|
20
19
|
import traceback
|
|
21
|
-
from dataclasses import dataclass
|
|
20
|
+
from dataclasses import dataclass
|
|
22
21
|
from pathlib import Path
|
|
23
|
-
from typing import
|
|
22
|
+
from typing import Dict, List, Optional
|
|
24
23
|
|
|
25
24
|
from rich.console import Console
|
|
26
25
|
|
|
@@ -35,10 +34,9 @@ from ..verifiers.docker_verifier import verify as docker_verify
|
|
|
35
34
|
from ..verifiers.pytest_verifier import verify as pytest_verify
|
|
36
35
|
from .corpus import append_corpus, load_corpus, sample_corpus
|
|
37
36
|
from .gating import load_state, save_state, should_accept, update_state
|
|
38
|
-
from .generate import GeneratedTask
|
|
37
|
+
from .generate import GeneratedTask
|
|
39
38
|
from .history import append_history
|
|
40
39
|
from .inference import Rollout, build_tasks
|
|
41
|
-
from .mutate import mutate_tasks
|
|
42
40
|
from .trainer import train_on_rollouts
|
|
43
41
|
from .weights import (
|
|
44
42
|
WeightPointer,
|
|
@@ -373,9 +371,9 @@ def run_rlm(
|
|
|
373
371
|
# Multi-Process Orchestrated RLM
|
|
374
372
|
# =============================================================================
|
|
375
373
|
|
|
376
|
-
from ..orchestrator.queue import MessageQueue, MessageType, Message
|
|
377
|
-
from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker
|
|
378
|
-
from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker
|
|
374
|
+
from ..orchestrator.queue import MessageQueue, MessageType, Message # noqa: E402
|
|
375
|
+
from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker # noqa: E402
|
|
376
|
+
from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker # noqa: E402
|
|
379
377
|
|
|
380
378
|
|
|
381
379
|
@dataclass
|
mlxsmith/rlm/weights.py
CHANGED
mlxsmith/sdk/future.py
CHANGED
|
@@ -7,9 +7,8 @@ and progress tracking for async operations.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import threading
|
|
10
|
-
import time
|
|
11
10
|
from concurrent.futures import Future, ThreadPoolExecutor
|
|
12
|
-
from typing import Any, Callable, Generic, Iterable, Optional, TypeVar
|
|
11
|
+
from typing import Any, Callable, Generic, Iterable, Optional, TypeVar
|
|
13
12
|
|
|
14
13
|
from ..llm.backend import DecodingConfig
|
|
15
14
|
|
|
@@ -344,7 +343,7 @@ class APIFuture(Generic[T]):
|
|
|
344
343
|
self._run_finally_callbacks()
|
|
345
344
|
return True
|
|
346
345
|
|
|
347
|
-
def cancelled(self) -> bool: # type: ignore
|
|
346
|
+
def cancelled(self) -> bool: # type: ignore # noqa: F811
|
|
348
347
|
"""Check if the future was cancelled."""
|
|
349
348
|
with self._lock:
|
|
350
349
|
return self._state == APIFutureState.CANCELLED
|
mlxsmith/sdk/sampling_client.py
CHANGED
|
@@ -27,8 +27,7 @@ Example:
|
|
|
27
27
|
from __future__ import annotations
|
|
28
28
|
|
|
29
29
|
from dataclasses import dataclass, field
|
|
30
|
-
from typing import Any, Dict, List, Optional, Sequence
|
|
31
|
-
import concurrent.futures
|
|
30
|
+
from typing import Any, Dict, List, Optional, Sequence
|
|
32
31
|
|
|
33
32
|
from .future import APIFuture, SdkFuturePool
|
|
34
33
|
|
mlxsmith/sdk/training_client.py
CHANGED
|
@@ -22,7 +22,7 @@ Example:
|
|
|
22
22
|
from __future__ import annotations
|
|
23
23
|
|
|
24
24
|
from dataclasses import dataclass
|
|
25
|
-
from typing import Any, Callable, Dict, List, Optional
|
|
25
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
26
26
|
|
|
27
27
|
from .future import APIFuture, SdkFuturePool
|
|
28
28
|
|
|
@@ -194,7 +194,7 @@ class TrainingClient:
|
|
|
194
194
|
if batch.is_preference:
|
|
195
195
|
# Preference training (DPO, ORPO, etc.)
|
|
196
196
|
if batch.rejected_responses is None:
|
|
197
|
-
raise ValueError(
|
|
197
|
+
raise ValueError("Preference batch requires rejected_responses")
|
|
198
198
|
|
|
199
199
|
for prompt, chosen, rejected in zip(
|
|
200
200
|
batch.prompts,
|
|
@@ -345,7 +345,6 @@ class TrainingClient:
|
|
|
345
345
|
def _run_save() -> CheckpointResult:
|
|
346
346
|
try:
|
|
347
347
|
from pathlib import Path
|
|
348
|
-
import json
|
|
349
348
|
|
|
350
349
|
save_path = Path(path)
|
|
351
350
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -673,4 +672,4 @@ class DistillationTrainingClient(TrainingClient):
|
|
|
673
672
|
|
|
674
673
|
|
|
675
674
|
# Import at end to avoid circular dependency
|
|
676
|
-
from .sampling_client import SamplingClient
|
|
675
|
+
from .sampling_client import SamplingClient # noqa: E402
|
mlxsmith/server.py
CHANGED
|
@@ -10,15 +10,10 @@ This module provides:
|
|
|
10
10
|
|
|
11
11
|
from __future__ import annotations
|
|
12
12
|
|
|
13
|
-
import json
|
|
14
|
-
import time
|
|
15
|
-
import uuid
|
|
16
13
|
from pathlib import Path
|
|
17
|
-
from typing import Any, Dict, List, Optional
|
|
18
14
|
|
|
19
15
|
from fastapi import FastAPI
|
|
20
|
-
from fastapi.responses import
|
|
21
|
-
from pydantic import BaseModel
|
|
16
|
+
from fastapi.responses import HTMLResponse
|
|
22
17
|
|
|
23
18
|
from .config import ProjectConfig
|
|
24
19
|
from .models import resolve_model_spec
|
mlxsmith/train/lora.py
CHANGED
|
@@ -160,6 +160,8 @@ def apply_lora(model: Any, cfg: LoRAConfig) -> dict:
|
|
|
160
160
|
keys = sorted(_keys_for_target_modules(model, cfg.target_modules))
|
|
161
161
|
|
|
162
162
|
if tuner_utils is not None and hasattr(tuner_utils, "linear_to_lora_layers"):
|
|
163
|
+
# Freeze all base weights first so only LoRA params are trainable
|
|
164
|
+
model.freeze()
|
|
163
165
|
# MLX-LM format
|
|
164
166
|
config = {
|
|
165
167
|
"rank": int(cfg.r),
|
|
@@ -256,6 +258,8 @@ def apply_adapter(model: Any, adapter_dir: str | Path) -> dict | None:
|
|
|
256
258
|
adapter_cfg = load_adapter_config(adapter_dir)
|
|
257
259
|
tuner_utils, _ = _try_mlx_lm_utils()
|
|
258
260
|
if adapter_cfg is not None and tuner_utils is not None and hasattr(tuner_utils, "load_adapters"):
|
|
261
|
+
# Freeze base weights so only adapter params are trainable
|
|
262
|
+
model.freeze()
|
|
259
263
|
tuner_utils.load_adapters(model, str(adapter_dir))
|
|
260
264
|
return adapter_cfg
|
|
261
265
|
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlxsmith
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
|
|
5
|
+
Author-email: Shannon Labs <hmbown@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
|
|
8
|
+
Project-URL: Repository, https://github.com/Hmbown/MLXSmith
|
|
9
|
+
Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
|
|
10
|
+
Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: typer>=0.9.0
|
|
26
|
+
Requires-Dist: rich>=13.7.0
|
|
27
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
28
|
+
Requires-Dist: pydantic>=2.5.0
|
|
29
|
+
Requires-Dist: pydantic-settings>=2.2.1
|
|
30
|
+
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
31
|
+
Requires-Dist: huggingface_hub>=1.3.4
|
|
32
|
+
Requires-Dist: jsonschema>=4.21.0
|
|
33
|
+
Provides-Extra: mlx
|
|
34
|
+
Requires-Dist: mlx>=0.30.4; extra == "mlx"
|
|
35
|
+
Provides-Extra: llm
|
|
36
|
+
Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
|
|
37
|
+
Requires-Dist: transformers>=5.0.0; extra == "llm"
|
|
38
|
+
Requires-Dist: datasets>=3.0.0; extra == "llm"
|
|
39
|
+
Provides-Extra: serve
|
|
40
|
+
Requires-Dist: fastapi>=0.128.0; extra == "serve"
|
|
41
|
+
Requires-Dist: uvicorn>=0.40.0; extra == "serve"
|
|
42
|
+
Requires-Dist: httpx>=0.28.0; extra == "serve"
|
|
43
|
+
Provides-Extra: zmlx
|
|
44
|
+
Requires-Dist: zmlx; extra == "zmlx"
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: pytest>=9.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: ruff>=0.14.0; extra == "dev"
|
|
48
|
+
Provides-Extra: all
|
|
49
|
+
Requires-Dist: mlx>=0.30.4; extra == "all"
|
|
50
|
+
Requires-Dist: mlx-lm>=0.30.5; extra == "all"
|
|
51
|
+
Requires-Dist: transformers>=5.0.0; extra == "all"
|
|
52
|
+
Requires-Dist: datasets>=3.0.0; extra == "all"
|
|
53
|
+
Requires-Dist: fastapi>=0.128.0; extra == "all"
|
|
54
|
+
Requires-Dist: uvicorn>=0.40.0; extra == "all"
|
|
55
|
+
Requires-Dist: httpx>=0.28.0; extra == "all"
|
|
56
|
+
Dynamic: license-file
|
|
57
|
+
|
|
58
|
+
# mlxsmith
|
|
59
|
+
|
|
60
|
+
Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
|
|
61
|
+
|
|
62
|
+
**Status:** alpha (v0.1.0). Full training pipeline validated on Qwen3-4B.
|
|
63
|
+
|
|
64
|
+
## Install
|
|
65
|
+
|
|
66
|
+
MLX training and serving require macOS on Apple Silicon.
|
|
67
|
+
Other platforms can use data tools and mock backends.
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
python -m venv .venv && source .venv/bin/activate
|
|
71
|
+
pip install -U pip
|
|
72
|
+
|
|
73
|
+
# Core CLI (data tools, config, project scaffolding)
|
|
74
|
+
pip install mlxsmith
|
|
75
|
+
|
|
76
|
+
# Apple Silicon training + serving
|
|
77
|
+
pip install "mlxsmith[mlx,llm,serve]"
|
|
78
|
+
|
|
79
|
+
# Everything
|
|
80
|
+
pip install "mlxsmith[all]"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Quickstart
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
mlxsmith init myproj
|
|
87
|
+
cd myproj
|
|
88
|
+
mlxsmith doctor # check Python, MLX, Metal, ZMLX
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Training
|
|
92
|
+
|
|
93
|
+
### SFT (LoRA/QLoRA)
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
|
|
100
|
+
|
|
101
|
+
### Preference tuning (DPO/ORPO)
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
|
|
105
|
+
--data data/prefs --algo dpo
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
|
|
109
|
+
|
|
110
|
+
### Reinforced fine-tuning (GRPO)
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
|
|
114
|
+
--env envs/coding.yaml --verifier verifiers/pytest.py
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
|
|
118
|
+
|
|
119
|
+
### Knowledge distillation
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Offline distillation (teacher generates, student learns)
|
|
123
|
+
mlxsmith distill --teacher large-model --student small-model --mode offline
|
|
124
|
+
|
|
125
|
+
# Online preference distillation (OPD)
|
|
126
|
+
mlxsmith distill --teacher large-model --student small-model --mode opd
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Full pipeline
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Run SFT → Pref → RFT in sequence
|
|
133
|
+
mlxsmith pipeline
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Serving
|
|
137
|
+
|
|
138
|
+
OpenAI-compatible `/v1/chat/completions` endpoint.
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
mlxsmith serve --model runs/sft_0001/adapter --port 8080
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
curl http://localhost:8080/v1/chat/completions \
|
|
146
|
+
-H 'Content-Type: application/json' \
|
|
147
|
+
-d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
|
|
151
|
+
|
|
152
|
+
## Data tools
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
mlxsmith data presets # list built-in datasets
|
|
156
|
+
mlxsmith data pull alpaca # pull a preset
|
|
157
|
+
mlxsmith data import raw.json --out data/sft/train.jsonl # import ShareGPT → JSONL
|
|
158
|
+
mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
|
|
159
|
+
mlxsmith data stats data/sft/train.jsonl # token counts, field analysis
|
|
160
|
+
mlxsmith data validate data/sft/train.jsonl # structure check
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
|
|
164
|
+
|
|
165
|
+
## Model management
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Pull + convert HF model to MLX
|
|
169
|
+
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
|
|
170
|
+
|
|
171
|
+
# With quantization
|
|
172
|
+
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
|
|
173
|
+
|
|
174
|
+
# Merge adapters
|
|
175
|
+
mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## HF auth
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
mlxsmith auth login --token "$HF_TOKEN"
|
|
182
|
+
mlxsmith auth status
|
|
183
|
+
mlxsmith auth logout
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Eval and bench
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# Evaluation suite (pass@k with verifier checks)
|
|
190
|
+
mlxsmith eval --suite eval/suites/coding.yaml
|
|
191
|
+
|
|
192
|
+
# Benchmark inference or training throughput
|
|
193
|
+
mlxsmith bench --mode inference
|
|
194
|
+
mlxsmith bench --mode trainer
|
|
195
|
+
mlxsmith bench --mode end_to_end
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Verifiers
|
|
199
|
+
|
|
200
|
+
Built-in verifiers for eval, RFT, and preference tuning:
|
|
201
|
+
|
|
202
|
+
- **regex** — pattern matching on completions
|
|
203
|
+
- **jsonschema** — JSON structure validation
|
|
204
|
+
- **pytest** — sandboxed test execution
|
|
205
|
+
- **docker** — containerized verification
|
|
206
|
+
- **compose** — multi-verifier composition (AND/OR/weighted)
|
|
207
|
+
|
|
208
|
+
See `docs/VERIFIERS.md` for the verifier API.
|
|
209
|
+
|
|
210
|
+
## Environment plugin system
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
mlxsmith env list # list available environments
|
|
214
|
+
mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
|
|
215
|
+
mlxsmith env init my_env # scaffold a new environment
|
|
216
|
+
mlxsmith env install ./my_env # install from directory
|
|
217
|
+
mlxsmith env package ./my_env # create distributable tarball
|
|
218
|
+
mlxsmith env run envs/coding.yaml # execute RFT with this environment
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
|
|
222
|
+
|
|
223
|
+
## Config system
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
mlxsmith config show # display merged config (YAML/JSON/TOML)
|
|
227
|
+
mlxsmith config show --sources # show where each value comes from
|
|
228
|
+
mlxsmith config init # create default mlxsmith.yaml
|
|
229
|
+
mlxsmith config validate # check config structure
|
|
230
|
+
mlxsmith config env # show environment variable mapping
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
|
|
234
|
+
|
|
235
|
+
## SDK (programmatic API)
|
|
236
|
+
|
|
237
|
+
For building custom training loops:
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
|
|
241
|
+
|
|
242
|
+
loaded = load_model("path/to/model", config)
|
|
243
|
+
|
|
244
|
+
# Sampling with logprobs
|
|
245
|
+
sampler = SamplingClient(loaded.backend)
|
|
246
|
+
result = sampler.sample("prompt", logprobs_k=5)
|
|
247
|
+
|
|
248
|
+
# Training operations
|
|
249
|
+
trainer = TrainingClient(loaded.backend)
|
|
250
|
+
trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
|
|
251
|
+
fb = trainer.forward_backward(batch)
|
|
252
|
+
trainer.optim_step(fb.result().grads)
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
|
|
256
|
+
|
|
257
|
+
## Research
|
|
258
|
+
|
|
259
|
+
### RLM self-play loop
|
|
260
|
+
|
|
261
|
+
RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
mlxsmith rlm # single-process RLM
|
|
265
|
+
mlxsmith pipeline --orchestrated # multi-process orchestrated RLM
|
|
266
|
+
mlxsmith rlm status # check iteration state
|
|
267
|
+
mlxsmith rlm history # view history
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
|
|
271
|
+
|
|
272
|
+
### ZMLX acceleration
|
|
273
|
+
|
|
274
|
+
Optional zero-copy MLX acceleration backend.
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
mlxsmith accel status
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## Docs
|
|
281
|
+
|
|
282
|
+
- `docs/PROJECT_FORMAT.md` — project layout and artifacts
|
|
283
|
+
- `docs/VERIFIERS.md` — verifier API and sandbox behavior
|
|
284
|
+
- `docs/COMPATIBILITY.md` — tested versions and model families
|
|
285
|
+
- `docs/ENVIRONMENTS.md` — environment plugin system
|
|
286
|
+
- `docs/orchestrator.md` — multi-process RLM orchestrator
|
|
287
|
+
- `docs/rlm-ctl.md` — RLM training guide
|
|
288
|
+
- `docs/ROADMAP.md` — product direction and milestones
|
|
289
|
+
- `docs/README.md` — full docs index
|
|
290
|
+
|
|
291
|
+
## License
|
|
292
|
+
|
|
293
|
+
MIT
|
|
@@ -2,55 +2,55 @@ mlxsmith/__init__.py,sha256=CJZKl9Hp16DYlQR7yqstir-cL4n7GCw90d-meXliCHk,48
|
|
|
2
2
|
mlxsmith/adapters.py,sha256=wkQ2q_ugaxCviNARSmxehwBcc2_NKVJ7mOofT-y30TY,1318
|
|
3
3
|
mlxsmith/auth.py,sha256=_j_gx5ccZfpHs0_Xmpcgh_ELhX3ZBJLg2YYpjA-aPRI,2195
|
|
4
4
|
mlxsmith/bench.py,sha256=VBgY9uOGm3xhmL3UrNNOnUoa3P0yaVQQ7wxykIEmDEw,3778
|
|
5
|
-
mlxsmith/cli.py,sha256=
|
|
6
|
-
mlxsmith/config.py,sha256=
|
|
5
|
+
mlxsmith/cli.py,sha256=I7vzO2Z3ja1ncMOGHhXSEN_kElfWdmA6Jvs4rFgINZ0,35767
|
|
6
|
+
mlxsmith/config.py,sha256=K1VbN-3WoWf4vzZ6BNeTgEz2DFH8s9YminqwyNBVLj0,16389
|
|
7
7
|
mlxsmith/config_models.py,sha256=pMsLGyC9J9T9jqs5ipYFUuEkfir7iiDA4EZdcb65k5g,8407
|
|
8
8
|
mlxsmith/data.py,sha256=3ZlNS8bnD7LlWACEmULbf8RGQzCuf0QulFpI1PWvNuI,16160
|
|
9
9
|
mlxsmith/eval.py,sha256=nSARSEKKwZM8Ot5rUDDpGikaClGNxvg0ifgGkTA6mM0,3792
|
|
10
10
|
mlxsmith/infer.py,sha256=ekpHhTird0dnTJzFOc-O98rjwkEKgAr9AFicKlaB3MA,4610
|
|
11
11
|
mlxsmith/models.py,sha256=BRaPTxzqy-5KEKdccveMgjpbRP0ZmbRnA_su8rz2P4k,8033
|
|
12
12
|
mlxsmith/runs.py,sha256=2voYBryGGMlAKskHJ7TDiIPQL2_fFxSQ8RgtfGZ7ccg,1409
|
|
13
|
-
mlxsmith/server.py,sha256=
|
|
13
|
+
mlxsmith/server.py,sha256=Fk-i9xK_Teq0Z0m-W1GRJVtcG0nYvd4bQ85lnAUuT1w,10690
|
|
14
14
|
mlxsmith/util.py,sha256=8fagFtMP2YS1qlWkCt8bPWAz2jAgfvAf1ipNGJZAgIk,4544
|
|
15
15
|
mlxsmith/accel/__init__.py,sha256=Mv2mj-2bLqUILhMUCjMLu7JORcu0-cGBOri32j7O5Xo,291
|
|
16
16
|
mlxsmith/accel/base.py,sha256=o0kmxV68KbxOmucC3eDjKbFA8dfTT2ETqN0XD_l3mM0,435
|
|
17
17
|
mlxsmith/accel/none.py,sha256=WhxECIBv-pE63Vh1Iv86ObgT9JHOi4hA4BUyJc__sKU,362
|
|
18
18
|
mlxsmith/accel/zmlx_backend.py,sha256=JfzQ44v9hrCJgcqU018ZD7qLNlubIe09CwYRpKyfwR8,1529
|
|
19
19
|
mlxsmith/api/__init__.py,sha256=IrpIXDkUJm4BZqahYOK_0BkxvomlscEvCoLCm4GDxo8,998
|
|
20
|
-
mlxsmith/api/handlers.py,sha256=
|
|
20
|
+
mlxsmith/api/handlers.py,sha256=94Spq4glFp7mRwmKrFqt7erd1Af_PxVP_vpxCo2UFdQ,46896
|
|
21
21
|
mlxsmith/api/schemas.py,sha256=Q18kF9FKtvT1vdnXy6feSNTtCV2FiRWDzfdsPzc0te8,19316
|
|
22
22
|
mlxsmith/envs/__init__.py,sha256=t7QiEHtfyP1dUCj-4TJUN0hd9lRqBKYd5Ek7dgEwus4,671
|
|
23
23
|
mlxsmith/envs/system.py,sha256=2bChkOxm2S7d0WCrweHGhoI6-xOYDxlC0YbHH6Ibjq4,12782
|
|
24
24
|
mlxsmith/envs/token_env.py,sha256=rhv2o3eI1GyTtfAXG72z-31amNGaLv0KW56mEsWkXlY,6709
|
|
25
25
|
mlxsmith/llm/__init__.py,sha256=jWEkXGdvwZ8tUYHVqWW3SYHXG-LSWaGbdwOR0mF_4Zw,396
|
|
26
26
|
mlxsmith/llm/backend.py,sha256=_xGfdJ30_6Nq8zIqMjTemCZWV-vUi9qV0djfwxny5SM,4335
|
|
27
|
-
mlxsmith/llm/interface.py,sha256=
|
|
27
|
+
mlxsmith/llm/interface.py,sha256=udQl_R7ecmM4Nh20P50Nmnv2h853ByrgevjQIRDxX4g,6601
|
|
28
28
|
mlxsmith/llm/mlx_lm_backend.py,sha256=OitqY_6LqnfqbN7GZz39Ma3cItjjNqHXF3SV3AZsHxk,18284
|
|
29
|
-
mlxsmith/llm/mock_backend.py,sha256=
|
|
29
|
+
mlxsmith/llm/mock_backend.py,sha256=ZTkPRnRmCXSkhOJ1TLbSgTnAcLTKa9sq6-zzbTEX6Qc,7439
|
|
30
30
|
mlxsmith/llm/registry.py,sha256=ZmYE-WclyMo6z0HwUufqt3tKT4E84xZ6I-PFu1Z5nws,309
|
|
31
31
|
mlxsmith/orchestrator/__init__.py,sha256=oc4qIkISZMGMvYeOqU8lDDmFL3uxDYJHsv_rra9DH-E,792
|
|
32
|
-
mlxsmith/orchestrator/daemon.py,sha256=
|
|
33
|
-
mlxsmith/orchestrator/inference_worker.py,sha256=
|
|
34
|
-
mlxsmith/orchestrator/queue.py,sha256=
|
|
35
|
-
mlxsmith/orchestrator/trainer_worker.py,sha256=
|
|
32
|
+
mlxsmith/orchestrator/daemon.py,sha256=VJFF8s_85h4C-YM14wRUlzDHrhhVPfQOztmNLKyRk30,16107
|
|
33
|
+
mlxsmith/orchestrator/inference_worker.py,sha256=PfmsanrBnx9HZNqG00jTQQTKqDa2bl-wUtYAWtxfzvs,17963
|
|
34
|
+
mlxsmith/orchestrator/queue.py,sha256=E8VymvJi2zEpuTwsG7JB-vROJGGS5evPPhIpkmdwtq4,11286
|
|
35
|
+
mlxsmith/orchestrator/trainer_worker.py,sha256=IM7vOhkYlCKoQSC1tNobkeIVnkmsz4DtAWfu24bjJXY,16015
|
|
36
36
|
mlxsmith/rlm/__init__.py,sha256=Q09oRONXWTFXuWwMJOpGWg0I-UDkuib0OA1O_cNFp2U,236
|
|
37
37
|
mlxsmith/rlm/corpus.py,sha256=-p12H650_ybe2kXC219M4wXYpD08QHUpY92ErVjSfX8,2112
|
|
38
38
|
mlxsmith/rlm/gating.py,sha256=L18niYKEezphASdsgzW6pz3PN7ylA-L5Wu4_GLLVfHw,2455
|
|
39
39
|
mlxsmith/rlm/generate.py,sha256=q1v_TP8sqVj05omhoF60Ns1iX6yClgc7lP6njz4lK18,7601
|
|
40
40
|
mlxsmith/rlm/history.py,sha256=Vm4JtWqsZnqB-fuo3zWfweeogmmLTL3VHaYZ45vrkz8,299
|
|
41
41
|
mlxsmith/rlm/inference.py,sha256=ntCEKxD1KrkIXgZNQbD4jhS5rJPtwcVYc8qLc5E5cnc,5297
|
|
42
|
-
mlxsmith/rlm/loop.py,sha256=
|
|
42
|
+
mlxsmith/rlm/loop.py,sha256=nkAR5KYErcq893kQYigFkl3NG469ZUu1CkN_sS1ObAM,49900
|
|
43
43
|
mlxsmith/rlm/mutate.py,sha256=_NUNMpVCRaEDgtzI8J2NOTcj5NnycZnP_UoUpFacjTs,2553
|
|
44
44
|
mlxsmith/rlm/trainer.py,sha256=RRXPlJy4SySpLZGge0ORMYs7HeiWgfGQNeMBOBfG4Ys,3014
|
|
45
|
-
mlxsmith/rlm/weights.py,sha256=
|
|
45
|
+
mlxsmith/rlm/weights.py,sha256=tgl4Uc80QF9YpCCr3ewBmL7uru9As2fDA1Z1SgZn-e4,8455
|
|
46
46
|
mlxsmith/sdk/__init__.py,sha256=42WpTgC309sYKp8SArULBWz0trVN51THcjvPdVh-thc,10777
|
|
47
|
-
mlxsmith/sdk/future.py,sha256=
|
|
47
|
+
mlxsmith/sdk/future.py,sha256=WmYB-fDstaEuv-FUNX_S7IJSENbVEsYYEEVzH02ImLk,16868
|
|
48
48
|
mlxsmith/sdk/losses.py,sha256=lJi3R7Red_QO3IatbhKi_GBI7kM0yu-kS14xN2kX_04,7532
|
|
49
|
-
mlxsmith/sdk/sampling_client.py,sha256=
|
|
50
|
-
mlxsmith/sdk/training_client.py,sha256=
|
|
49
|
+
mlxsmith/sdk/sampling_client.py,sha256=o7jfgYpVWXrrIHo4-SrGAJx4FAlYdo198da27Jp0Yj4,24899
|
|
50
|
+
mlxsmith/sdk/training_client.py,sha256=71bSgS65ofXL9X4qGSyDhVL5asDxENAf95LV66pwC3g,24341
|
|
51
51
|
mlxsmith/train/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
52
|
mlxsmith/train/distill.py,sha256=9Xbzn6zt8uqYOwg_pJB2rZJj8y-nESP1OB8DLxnJ0jM,10378
|
|
53
|
-
mlxsmith/train/lora.py,sha256=
|
|
53
|
+
mlxsmith/train/lora.py,sha256=k3aNqBjMyE6rPGS2CAJRSDsTJiUa1ztjrA3k9N87IjY,9046
|
|
54
54
|
mlxsmith/train/pref.py,sha256=-z7mj-MQu6dPb8y0U6sRFbqKU0nNQ3YRpC3CcmS3l5k,6987
|
|
55
55
|
mlxsmith/train/rft.py,sha256=bf6z-h6VQKvMDZ0XN_ayZV44YsIvuwSzP1YRYrwSQ2M,18956
|
|
56
56
|
mlxsmith/train/sft.py,sha256=w3QmLLoscNQzz-xbtmrCw46PbYIApvgeQi0XjxCop90,5590
|
|
@@ -61,9 +61,9 @@ mlxsmith/verifiers/jsonschema.py,sha256=hG_8c07Hwv-tpN2g0oxELwmLRxS8QGzRFwabmo4y
|
|
|
61
61
|
mlxsmith/verifiers/pytest_verifier.py,sha256=ARNajzxUPNwtzSow6I2d0mLopZyvY29_d3F1sYVwEUY,2514
|
|
62
62
|
mlxsmith/verifiers/regex.py,sha256=N7z3koE8Iy-a4DBs4404iQCNX2WGxequm5g4umric2Y,524
|
|
63
63
|
mlxsmith/verifiers/types.py,sha256=FytBxB1OnNX1EcqZXSSs3WvL0GRv7byW4mfBMf6xP68,240
|
|
64
|
-
mlxsmith-0.1.
|
|
65
|
-
mlxsmith-0.1.
|
|
66
|
-
mlxsmith-0.1.
|
|
67
|
-
mlxsmith-0.1.
|
|
68
|
-
mlxsmith-0.1.
|
|
69
|
-
mlxsmith-0.1.
|
|
64
|
+
mlxsmith-0.1.1.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
|
|
65
|
+
mlxsmith-0.1.1.dist-info/METADATA,sha256=d0H3FBZa2-BfGVbz3TWpjtgWhMe-t46bp8PQELh5yFQ,9131
|
|
66
|
+
mlxsmith-0.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
67
|
+
mlxsmith-0.1.1.dist-info/entry_points.txt,sha256=ys7GcKEjhzhkkTMBmmaNavTgsjqOuFnCKIG2w8Wcn6w,46
|
|
68
|
+
mlxsmith-0.1.1.dist-info/top_level.txt,sha256=hKBwc8bn7uoI-_5Yhcq1T3IuChFhUFdzItIkZK1up6A,9
|
|
69
|
+
mlxsmith-0.1.1.dist-info/RECORD,,
|
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: mlxsmith
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Apple Silicon MLX fine-tuning and OpenAI-compatible serving (SFT stable; preference/RL experimental).
|
|
5
|
-
Author-email: Shannon Labs <hmbown@gmail.com>
|
|
6
|
-
License: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
|
|
8
|
-
Project-URL: Repository, https://github.com/Hmbown/MLXSmith
|
|
9
|
-
Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
|
|
10
|
-
Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
|
|
11
|
-
Classifier: Development Status :: 3 - Alpha
|
|
12
|
-
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: Intended Audience :: Science/Research
|
|
14
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
-
Classifier: Operating System :: MacOS :: MacOS X
|
|
22
|
-
Requires-Python: >=3.10
|
|
23
|
-
Description-Content-Type: text/markdown
|
|
24
|
-
License-File: LICENSE
|
|
25
|
-
Requires-Dist: typer>=0.9.0
|
|
26
|
-
Requires-Dist: rich>=13.7.0
|
|
27
|
-
Requires-Dist: pyyaml>=6.0.1
|
|
28
|
-
Requires-Dist: pydantic>=2.5.0
|
|
29
|
-
Requires-Dist: pydantic-settings>=2.2.1
|
|
30
|
-
Requires-Dist: tomli>=2.0.1; python_version < "3.11"
|
|
31
|
-
Requires-Dist: huggingface_hub>=1.3.4
|
|
32
|
-
Requires-Dist: jsonschema>=4.21.0
|
|
33
|
-
Provides-Extra: mlx
|
|
34
|
-
Requires-Dist: mlx>=0.30.4; extra == "mlx"
|
|
35
|
-
Provides-Extra: llm
|
|
36
|
-
Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
|
|
37
|
-
Requires-Dist: transformers>=5.0.0; extra == "llm"
|
|
38
|
-
Requires-Dist: datasets>=3.0.0; extra == "llm"
|
|
39
|
-
Provides-Extra: serve
|
|
40
|
-
Requires-Dist: fastapi>=0.128.0; extra == "serve"
|
|
41
|
-
Requires-Dist: uvicorn>=0.40.0; extra == "serve"
|
|
42
|
-
Requires-Dist: httpx>=0.28.0; extra == "serve"
|
|
43
|
-
Provides-Extra: zmlx
|
|
44
|
-
Requires-Dist: zmlx; extra == "zmlx"
|
|
45
|
-
Provides-Extra: dev
|
|
46
|
-
Requires-Dist: pytest>=9.0.0; extra == "dev"
|
|
47
|
-
Requires-Dist: ruff>=0.14.0; extra == "dev"
|
|
48
|
-
Provides-Extra: all
|
|
49
|
-
Requires-Dist: mlx>=0.30.4; extra == "all"
|
|
50
|
-
Requires-Dist: mlx-lm>=0.30.5; extra == "all"
|
|
51
|
-
Requires-Dist: transformers>=5.0.0; extra == "all"
|
|
52
|
-
Requires-Dist: datasets>=3.0.0; extra == "all"
|
|
53
|
-
Requires-Dist: fastapi>=0.128.0; extra == "all"
|
|
54
|
-
Requires-Dist: uvicorn>=0.40.0; extra == "all"
|
|
55
|
-
Requires-Dist: httpx>=0.28.0; extra == "all"
|
|
56
|
-
Dynamic: license-file
|
|
57
|
-
|
|
58
|
-
# mlxsmith
|
|
59
|
-
|
|
60
|
-
Apple Silicon MLX fine-tuning and OpenAI-compatible serving.
|
|
61
|
-
SFT + serving are stable. Preference/RL/RLM features are experimental.
|
|
62
|
-
|
|
63
|
-
Status: alpha (2026-02-02).
|
|
64
|
-
|
|
65
|
-
## Stable features
|
|
66
|
-
- Project init, config, data tools, HF auth, model pull/convert.
|
|
67
|
-
- SFT (LoRA/QLoRA) training with run tracking and adapters.
|
|
68
|
-
- Inference and OpenAI-compatible /v1/chat/completions serving.
|
|
69
|
-
- Basic eval/bench and verifier plumbing (regex/jsonschema/pytest).
|
|
70
|
-
|
|
71
|
-
## Experimental features
|
|
72
|
-
- Preference tuning (DPO/ORPO).
|
|
73
|
-
- GRPO-style RFT.
|
|
74
|
-
- RLM self-play loop (research).
|
|
75
|
-
- Distill/OPD and orchestrated RLM.
|
|
76
|
-
|
|
77
|
-
## Install
|
|
78
|
-
|
|
79
|
-
MLX is only available on Apple Silicon. Other platforms can still use data tools
|
|
80
|
-
and mock backends, but MLX training and serving require macOS on Apple Silicon.
|
|
81
|
-
|
|
82
|
-
```bash
|
|
83
|
-
python -m venv .venv && source .venv/bin/activate
|
|
84
|
-
pip install -U pip
|
|
85
|
-
|
|
86
|
-
# Core CLI
|
|
87
|
-
pip install mlxsmith
|
|
88
|
-
|
|
89
|
-
# Apple Silicon training + serving
|
|
90
|
-
pip install "mlxsmith[mlx,llm,serve]"
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
## Quickstart
|
|
94
|
-
|
|
95
|
-
```bash
|
|
96
|
-
mlxsmith init myproj
|
|
97
|
-
cd myproj
|
|
98
|
-
mlxsmith doctor
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
## HF auth (optional)
|
|
102
|
-
|
|
103
|
-
```bash
|
|
104
|
-
mlxsmith auth login --token "$HF_TOKEN"
|
|
105
|
-
mlxsmith auth status
|
|
106
|
-
mlxsmith auth logout
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
## Pull + convert a model (HF -> MLX)
|
|
110
|
-
|
|
111
|
-
```bash
|
|
112
|
-
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
|
|
113
|
-
# outputs to cache/mlx/Qwen__Qwen3-4B-Instruct-2507
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
Optional quantization:
|
|
117
|
-
|
|
118
|
-
```bash
|
|
119
|
-
mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
## SFT (LoRA/QLoRA)
|
|
123
|
-
|
|
124
|
-
```bash
|
|
125
|
-
mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
## Serve (OpenAI-compatible)
|
|
129
|
-
|
|
130
|
-
```bash
|
|
131
|
-
mlxsmith serve --model runs/sft_0001/adapter --port 8080
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
Sample request:
|
|
135
|
-
|
|
136
|
-
```bash
|
|
137
|
-
curl http://localhost:8080/v1/chat/completions \
|
|
138
|
-
-H 'Content-Type: application/json' \
|
|
139
|
-
-d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
To enable the optional UI/monitor dashboard, set `serve.ui: true` in `mlxsmith.yaml`.
|
|
143
|
-
|
|
144
|
-
## Experimental commands
|
|
145
|
-
|
|
146
|
-
- `mlxsmith pref` (DPO/ORPO)
|
|
147
|
-
- `mlxsmith rft` (GRPO-style)
|
|
148
|
-
- `mlxsmith rlm` / `mlxsmith pipeline` (self-play loop)
|
|
149
|
-
- `mlxsmith distill` (offline/OPD)
|
|
150
|
-
- `mlxsmith eval` / `mlxsmith bench`
|
|
151
|
-
|
|
152
|
-
## Docs
|
|
153
|
-
|
|
154
|
-
- `docs/PROJECT_FORMAT.md` for project layout and artifacts.
|
|
155
|
-
- `docs/VERIFIERS.md` for verifier API and sandbox behavior.
|
|
156
|
-
- `docs/COMPATIBILITY.md` for tested versions and model families.
|
|
157
|
-
- `docs/ENVIRONMENTS.md` for the environment plugin system.
|
|
158
|
-
- `docs/ROADMAP.md` for product direction and milestones.
|
|
159
|
-
- `docs/README.md` for the full docs index.
|
|
160
|
-
|
|
161
|
-
## License
|
|
162
|
-
|
|
163
|
-
MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|