PyPI - mlxsmith - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

mlxsmith 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

mlxsmith/api/handlers.py +2 -5
mlxsmith/cli.py +4 -4
mlxsmith/config.py +1 -2
mlxsmith/llm/interface.py +3 -5
mlxsmith/llm/mock_backend.py +1 -1
mlxsmith/orchestrator/daemon.py +4 -9
mlxsmith/orchestrator/inference_worker.py +1 -1
mlxsmith/orchestrator/queue.py +2 -2
mlxsmith/orchestrator/trainer_worker.py +0 -2
mlxsmith/rlm/loop.py +6 -8
mlxsmith/rlm/weights.py +1 -1
mlxsmith/sdk/future.py +2 -3
mlxsmith/sdk/sampling_client.py +1 -2
mlxsmith/sdk/training_client.py +3 -4
mlxsmith/server.py +1 -6
mlxsmith/train/lora.py +4 -0
mlxsmith-0.1.1.dist-info/METADATA +293 -0
{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/RECORD +22 -22
mlxsmith-0.1.0.dist-info/METADATA +0 -163
{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/WHEEL +0 -0
{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/entry_points.txt +0 -0
{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/licenses/LICENSE +0 -0
{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/top_level.txt +0 -0

mlxsmith/api/handlers.py CHANGED Viewed

@@ -20,11 +20,10 @@ import uuid
 from pathlib import Path
 from typing import Any, AsyncGenerator, Callable, Dict, List, Optional
-from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request, Security, status
+from fastapi import APIRouter, FastAPI, HTTPException, Request, Security, status
 from starlette.middleware.base import BaseHTTPMiddleware
 from fastapi.responses import StreamingResponse
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from pydantic import BaseModel
 from .schemas import (
     AdapterReloadRequest,
@@ -1102,12 +1101,10 @@ def create_router(
         models, use the list endpoint to check completion status.
         """
         cache_dir = _get_cache_dir()
-        local_path = cache_dir / "mlx" / request.model_id.replace("/", "__")
         try:
             # Import here to avoid circular dependencies
             from ..models import hf_pull
-            from ..config import ProjectConfig
             # Get HF token if available
             hf_token = None

mlxsmith/cli.py CHANGED Viewed

@@ -43,7 +43,7 @@ from .envs import (
 app = typer.Typer(
     add_completion=False,
-    help="mlxsmith — MLX fine-tuning + OpenAI-compatible serving (SFT stable; preference/RL experimental)",
+    help="mlxsmith — Apple Silicon MLX fine-tuning toolkit: SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.",
 )
 console = Console()
@@ -564,7 +564,7 @@ def config_validate(
     try:
         cfg = load_config(cfg_path, require=True)
-        console.print(f"[green]✓ Configuration is valid[/green]")
+        console.print("[green]✓ Configuration is valid[/green]")
         # Show summary
         table = Table(title="Configuration Summary")
@@ -593,9 +593,9 @@ def config_env(
     """Show available environment variables."""
     cfg = ProjectConfig()
-    console.print(f"\n[bold]Environment Variable Configuration[/bold]")
+    console.print("\n[bold]Environment Variable Configuration[/bold]")
     console.print(f"Prefix: [cyan]{prefix}[/cyan]")
-    console.print(f"Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
+    console.print("Nested delimiter: [cyan]__[/cyan] (double underscore)\n")
     table = Table(title=f"Available {prefix}* Environment Variables")
     table.add_column("Environment Variable")

mlxsmith/config.py CHANGED Viewed

@@ -18,7 +18,6 @@ Config files support @path syntax:
 from __future__ import annotations
 import json
-import os
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
@@ -94,7 +93,7 @@ class ProjectSettings(BaseSettings):
 # Import CLI aliases from models
-from .config_models import CLI_ALIASES as _CLI_ALIASES
+from .config_models import CLI_ALIASES as _CLI_ALIASES  # noqa: E402
 def resolve_config_path(config: Union[str, Path], root: Optional[Path] = None) -> Path:

mlxsmith/llm/interface.py CHANGED Viewed

@@ -182,11 +182,9 @@ def compute_logprobs(
     else:
         prompt_len = len(prompt_ids)
-    # Get generation with logprobs
-    full_text = backend.decode(ids)
-    # Use backend's sequence_logprob if available
-    seq_logprob = backend.sequence_logprob(ids, prompt_len=prompt_len)
+    # Decode and compute sequence-level logprob (used by callers via backend state)
+    backend.decode(ids)
+    backend.sequence_logprob(ids, prompt_len=prompt_len)
     # For per-token logprobs, we'd need to do a forward pass
     # This is a simplified version

mlxsmith/llm/mock_backend.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import random
 from dataclasses import dataclass
-from typing import Sequence, Any, List, Dict, Optional
+from typing import Sequence, Any, List, Dict
 from .backend import Generation

mlxsmith/orchestrator/daemon.py CHANGED Viewed

@@ -9,27 +9,22 @@ Manages rollout requests, training batches, and weight updates.
 from __future__ import annotations
-import json
 import multiprocessing as mp
 import signal
-import sys
 import time
 import traceback
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Callable
+from typing import Any, Dict, List, Optional
 from rich.console import Console
 from ..config import ProjectConfig
-from ..rlm.corpus import append_corpus, load_corpus, sample_corpus
-from ..rlm.gating import load_state, save_state, should_accept, update_state
-from ..rlm.history import append_history
-from ..rlm.inference import Rollout, build_tasks
+from ..rlm.gating import load_state
 from ..rlm.weights import WeightPointerStore, WeightPointerIPC
 from ..runs import new_run, snapshot_config
-from ..util import ensure_dir, now_ts, write_jsonl
-from .queue import MessageQueue, MessageType, Message
+from ..util import ensure_dir, now_ts
+from .queue import MessageQueue, MessageType
 from .inference_worker import InferenceConfig, run_inference_worker
 from .trainer_worker import TrainerConfig, run_trainer_worker

mlxsmith/orchestrator/inference_worker.py CHANGED Viewed

@@ -26,7 +26,7 @@ from fastapi.responses import StreamingResponse
 from ..config import ProjectConfig
 from ..llm.registry import get_llm_backend
 from ..models import resolve_model_spec
-from ..rlm.weights import WeightPointerStore, WeightPointerIPC
+from ..rlm.weights import WeightPointerStore
 from .queue import MessageQueue, MessageType, Message

mlxsmith/orchestrator/queue.py CHANGED Viewed

@@ -11,11 +11,11 @@ import json
 import multiprocessing as mp
 import time
 import uuid
-from dataclasses import asdict, dataclass, field
+from dataclasses import dataclass, field
 from enum import Enum, auto
 from pathlib import Path
 from queue import Empty
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 class MessageType(Enum):

mlxsmith/orchestrator/trainer_worker.py CHANGED Viewed

@@ -9,7 +9,6 @@ from __future__ import annotations
 import json
 import signal
-import sys
 import time
 import traceback
 from collections import defaultdict
@@ -17,7 +16,6 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, List, Optional
-from ..config import ProjectConfig
 from ..llm.registry import get_llm_backend
 from ..rlm.inference import Rollout
 from ..rlm.weights import WeightPointerStore, WeightPointerIPC

mlxsmith/rlm/loop.py CHANGED Viewed

@@ -15,12 +15,11 @@ from __future__ import annotations
 import json
 import multiprocessing as mp
 import signal
-import sys
 import time
 import traceback
-from dataclasses import dataclass, asdict
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Callable
+from typing import Dict, List, Optional
 from rich.console import Console
@@ -35,10 +34,9 @@ from ..verifiers.docker_verifier import verify as docker_verify
 from ..verifiers.pytest_verifier import verify as pytest_verify
 from .corpus import append_corpus, load_corpus, sample_corpus
 from .gating import load_state, save_state, should_accept, update_state
-from .generate import GeneratedTask, generate_tasks, filter_tasks
+from .generate import GeneratedTask
 from .history import append_history
 from .inference import Rollout, build_tasks
-from .mutate import mutate_tasks
 from .trainer import train_on_rollouts
 from .weights import (
     WeightPointer,
@@ -373,9 +371,9 @@ def run_rlm(
 # Multi-Process Orchestrated RLM
 # =============================================================================
-from ..orchestrator.queue import MessageQueue, MessageType, Message
-from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker
-from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker
+from ..orchestrator.queue import MessageQueue, MessageType, Message  # noqa: E402
+from ..orchestrator.inference_worker import InferenceConfig, run_inference_worker  # noqa: E402
+from ..orchestrator.trainer_worker import TrainerConfig, run_trainer_worker  # noqa: E402
 @dataclass

mlxsmith/rlm/weights.py CHANGED Viewed

@@ -9,7 +9,7 @@ from __future__ import annotations
 import json
 import multiprocessing as mp
 import time
-from dataclasses import dataclass, asdict
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional, Callable

mlxsmith/sdk/future.py CHANGED Viewed

@@ -7,9 +7,8 @@ and progress tracking for async operations.
 from __future__ import annotations
 import threading
-import time
 from concurrent.futures import Future, ThreadPoolExecutor
-from typing import Any, Callable, Generic, Iterable, Optional, TypeVar, Union
+from typing import Any, Callable, Generic, Iterable, Optional, TypeVar
 from ..llm.backend import DecodingConfig
@@ -344,7 +343,7 @@ class APIFuture(Generic[T]):
             self._run_finally_callbacks()
             return True
-    def cancelled(self) -> bool:  # type: ignore
+    def cancelled(self) -> bool:  # type: ignore  # noqa: F811
         """Check if the future was cancelled."""
         with self._lock:
             return self._state == APIFutureState.CANCELLED

mlxsmith/sdk/sampling_client.py CHANGED Viewed

@@ -27,8 +27,7 @@ Example:
 from __future__ import annotations
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Sequence, Union, Callable
-import concurrent.futures
+from typing import Any, Dict, List, Optional, Sequence
 from .future import APIFuture, SdkFuturePool

mlxsmith/sdk/training_client.py CHANGED Viewed

@@ -22,7 +22,7 @@ Example:
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional
 from .future import APIFuture, SdkFuturePool
@@ -194,7 +194,7 @@ class TrainingClient:
             if batch.is_preference:
                 # Preference training (DPO, ORPO, etc.)
                 if batch.rejected_responses is None:
-                    raise ValueError(f"Preference batch requires rejected_responses")
+                    raise ValueError("Preference batch requires rejected_responses")
                 for prompt, chosen, rejected in zip(
                     batch.prompts,
@@ -345,7 +345,6 @@ class TrainingClient:
         def _run_save() -> CheckpointResult:
             try:
                 from pathlib import Path
-                import json
                 save_path = Path(path)
                 save_path.parent.mkdir(parents=True, exist_ok=True)
@@ -673,4 +672,4 @@ class DistillationTrainingClient(TrainingClient):
 # Import at end to avoid circular dependency
-from .sampling_client import SamplingClient
+from .sampling_client import SamplingClient  # noqa: E402

mlxsmith/server.py CHANGED Viewed

@@ -10,15 +10,10 @@ This module provides:
 from __future__ import annotations
-import json
-import time
-import uuid
 from pathlib import Path
-from typing import Any, Dict, List, Optional
 from fastapi import FastAPI
-from fastapi.responses import StreamingResponse, HTMLResponse
-from pydantic import BaseModel
+from fastapi.responses import HTMLResponse
 from .config import ProjectConfig
 from .models import resolve_model_spec

mlxsmith/train/lora.py CHANGED Viewed

@@ -160,6 +160,8 @@ def apply_lora(model: Any, cfg: LoRAConfig) -> dict:
         keys = sorted(_keys_for_target_modules(model, cfg.target_modules))
     if tuner_utils is not None and hasattr(tuner_utils, "linear_to_lora_layers"):
+        # Freeze all base weights first so only LoRA params are trainable
+        model.freeze()
         # MLX-LM format
         config = {
             "rank": int(cfg.r),
@@ -256,6 +258,8 @@ def apply_adapter(model: Any, adapter_dir: str | Path) -> dict | None:
     adapter_cfg = load_adapter_config(adapter_dir)
     tuner_utils, _ = _try_mlx_lm_utils()
     if adapter_cfg is not None and tuner_utils is not None and hasattr(tuner_utils, "load_adapters"):
+        # Freeze base weights so only adapter params are trainable
+        model.freeze()
         tuner_utils.load_adapters(model, str(adapter_dir))
         return adapter_cfg

mlxsmith-0.1.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,293 @@
+Metadata-Version: 2.4
+Name: mlxsmith
+Version: 0.1.1
+Summary: Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
+Author-email: Shannon Labs <hmbown@gmail.com>
+License: MIT
+Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
+Project-URL: Repository, https://github.com/Hmbown/MLXSmith
+Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
+Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Operating System :: MacOS :: MacOS X
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: typer>=0.9.0
+Requires-Dist: rich>=13.7.0
+Requires-Dist: pyyaml>=6.0.1
+Requires-Dist: pydantic>=2.5.0
+Requires-Dist: pydantic-settings>=2.2.1
+Requires-Dist: tomli>=2.0.1; python_version < "3.11"
+Requires-Dist: huggingface_hub>=1.3.4
+Requires-Dist: jsonschema>=4.21.0
+Provides-Extra: mlx
+Requires-Dist: mlx>=0.30.4; extra == "mlx"
+Provides-Extra: llm
+Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
+Requires-Dist: transformers>=5.0.0; extra == "llm"
+Requires-Dist: datasets>=3.0.0; extra == "llm"
+Provides-Extra: serve
+Requires-Dist: fastapi>=0.128.0; extra == "serve"
+Requires-Dist: uvicorn>=0.40.0; extra == "serve"
+Requires-Dist: httpx>=0.28.0; extra == "serve"
+Provides-Extra: zmlx
+Requires-Dist: zmlx; extra == "zmlx"
+Provides-Extra: dev
+Requires-Dist: pytest>=9.0.0; extra == "dev"
+Requires-Dist: ruff>=0.14.0; extra == "dev"
+Provides-Extra: all
+Requires-Dist: mlx>=0.30.4; extra == "all"
+Requires-Dist: mlx-lm>=0.30.5; extra == "all"
+Requires-Dist: transformers>=5.0.0; extra == "all"
+Requires-Dist: datasets>=3.0.0; extra == "all"
+Requires-Dist: fastapi>=0.128.0; extra == "all"
+Requires-Dist: uvicorn>=0.40.0; extra == "all"
+Requires-Dist: httpx>=0.28.0; extra == "all"
+Dynamic: license-file
+# mlxsmith
+Apple Silicon MLX fine-tuning toolkit — SFT, DPO/ORPO, GRPO, distillation, and OpenAI-compatible serving.
+**Status:** alpha (v0.1.0). Full training pipeline validated on Qwen3-4B.
+## Install
+MLX training and serving require macOS on Apple Silicon.
+Other platforms can use data tools and mock backends.
+```bash
+python -m venv .venv && source .venv/bin/activate
+pip install -U pip
+# Core CLI (data tools, config, project scaffolding)
+pip install mlxsmith
+# Apple Silicon training + serving
+pip install "mlxsmith[mlx,llm,serve]"
+# Everything
+pip install "mlxsmith[all]"
+```
+## Quickstart
+```bash
+mlxsmith init myproj
+cd myproj
+mlxsmith doctor        # check Python, MLX, Metal, ZMLX
+```
+## Training
+### SFT (LoRA/QLoRA)
+```bash
+mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
+```
+Produces run artifacts under `runs/sft_NNNN/` (adapter weights, `metrics.jsonl`, config snapshot).
+### Preference tuning (DPO/ORPO)
+```bash
+mlxsmith pref --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
+  --data data/prefs --algo dpo
+```
+Supports DPO and ORPO algorithms with configurable beta and KL coefficients. Expects `{prompt, chosen, rejected}` data format.
+### Reinforced fine-tuning (GRPO)
+```bash
+mlxsmith rft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 \
+  --env envs/coding.yaml --verifier verifiers/pytest.py
+```
+GRPO-style RL training with token-level environment integration and verifier-based rewards. Rollout acceptance/rejection gating with reward tracking.
+### Knowledge distillation
+```bash
+# Offline distillation (teacher generates, student learns)
+mlxsmith distill --teacher large-model --student small-model --mode offline
+# Online preference distillation (OPD)
+mlxsmith distill --teacher large-model --student small-model --mode opd
+```
+### Full pipeline
+```bash
+# Run SFT → Pref → RFT in sequence
+mlxsmith pipeline
+```
+## Serving
+OpenAI-compatible `/v1/chat/completions` endpoint.
+```bash
+mlxsmith serve --model runs/sft_0001/adapter --port 8080
+```
+```bash
+curl http://localhost:8080/v1/chat/completions \
+  -H 'Content-Type: application/json' \
+  -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
+```
+Supports streaming (`"stream": true`), logprobs, stop sequences, and an optional UI dashboard (`serve.ui: true` in config).
+## Data tools
+```bash
+mlxsmith data presets                                     # list built-in datasets
+mlxsmith data pull alpaca                                 # pull a preset
+mlxsmith data import raw.json --out data/sft/train.jsonl  # import ShareGPT → JSONL
+mlxsmith data split data/sft/train.jsonl --fractions 0.9 0.05 0.05
+mlxsmith data stats data/sft/train.jsonl                  # token counts, field analysis
+mlxsmith data validate data/sft/train.jsonl               # structure check
+```
+Built-in presets: `alpaca`, `hh-rlhf`, `ultrachat-200k`, `ultrafeedback-binarized-prefs`, `ultrafeedback-binarized-sft`.
+## Model management
+```bash
+# Pull + convert HF model to MLX
+mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
+# With quantization
+mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
+# Merge adapters
+mlxsmith adapters merge runs/sft_0001/adapter runs/pref_0001/adapter --weights 0.7 0.3
+```
+## HF auth
+```bash
+mlxsmith auth login --token "$HF_TOKEN"
+mlxsmith auth status
+mlxsmith auth logout
+```
+## Eval and bench
+```bash
+# Evaluation suite (pass@k with verifier checks)
+mlxsmith eval --suite eval/suites/coding.yaml
+# Benchmark inference or training throughput
+mlxsmith bench --mode inference
+mlxsmith bench --mode trainer
+mlxsmith bench --mode end_to_end
+```
+## Verifiers
+Built-in verifiers for eval, RFT, and preference tuning:
+- **regex** — pattern matching on completions
+- **jsonschema** — JSON structure validation
+- **pytest** — sandboxed test execution
+- **docker** — containerized verification
+- **compose** — multi-verifier composition (AND/OR/weighted)
+See `docs/VERIFIERS.md` for the verifier API.
+## Environment plugin system
+```bash
+mlxsmith env list                  # list available environments
+mlxsmith env info envs/coding.yaml # show manifest (tasks, verifier, version)
+mlxsmith env init my_env           # scaffold a new environment
+mlxsmith env install ./my_env      # install from directory
+mlxsmith env package ./my_env      # create distributable tarball
+mlxsmith env run envs/coding.yaml  # execute RFT with this environment
+```
+Environments define tasks, verifiers, and reward functions for RFT training. See `docs/ENVIRONMENTS.md`.
+## Config system
+```bash
+mlxsmith config show              # display merged config (YAML/JSON/TOML)
+mlxsmith config show --sources    # show where each value comes from
+mlxsmith config init              # create default mlxsmith.yaml
+mlxsmith config validate          # check config structure
+mlxsmith config env               # show environment variable mapping
+```
+Config sources (in priority order): CLI flags > environment variables (`MLXSMITH__SECTION__KEY`) > config file > defaults.
+## SDK (programmatic API)
+For building custom training loops:
+```python
+from mlxsmith.sdk import load_model, SamplingClient, TrainingClient, TrainingBatch
+loaded = load_model("path/to/model", config)
+# Sampling with logprobs
+sampler = SamplingClient(loaded.backend)
+result = sampler.sample("prompt", logprobs_k=5)
+# Training operations
+trainer = TrainingClient(loaded.backend)
+trainer.create_optimizer(lr=1e-4, weight_decay=0.01)
+fb = trainer.forward_backward(batch)
+trainer.optim_step(fb.result().grads)
+```
+Loss functions: DPO, ORPO, GRPO, CISPO, DRO, PPO, importance sampling, cross-entropy.
+## Research
+### RLM self-play loop
+RLM (Recursive Language Model) is a research feature — the infrastructure runs but has not produced measured gains yet.
+```bash
+mlxsmith rlm                       # single-process RLM
+mlxsmith pipeline --orchestrated   # multi-process orchestrated RLM
+mlxsmith rlm status                # check iteration state
+mlxsmith rlm history               # view history
+```
+Includes task generation, mutation for data diversity, corpus management, EMA-based gating, and weight pointer IPC for multi-process coordination. See `docs/orchestrator.md`.
+### ZMLX acceleration
+Optional zero-copy MLX acceleration backend.
+```bash
+mlxsmith accel status
+```
+## Docs
+- `docs/PROJECT_FORMAT.md` — project layout and artifacts
+- `docs/VERIFIERS.md` — verifier API and sandbox behavior
+- `docs/COMPATIBILITY.md` — tested versions and model families
+- `docs/ENVIRONMENTS.md` — environment plugin system
+- `docs/orchestrator.md` — multi-process RLM orchestrator
+- `docs/rlm-ctl.md` — RLM training guide
+- `docs/ROADMAP.md` — product direction and milestones
+- `docs/README.md` — full docs index
+## License
+MIT

{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/RECORD RENAMED Viewed

@@ -2,55 +2,55 @@ mlxsmith/__init__.py,sha256=CJZKl9Hp16DYlQR7yqstir-cL4n7GCw90d-meXliCHk,48
 mlxsmith/adapters.py,sha256=wkQ2q_ugaxCviNARSmxehwBcc2_NKVJ7mOofT-y30TY,1318
 mlxsmith/auth.py,sha256=_j_gx5ccZfpHs0_Xmpcgh_ELhX3ZBJLg2YYpjA-aPRI,2195
 mlxsmith/bench.py,sha256=VBgY9uOGm3xhmL3UrNNOnUoa3P0yaVQQ7wxykIEmDEw,3778
-mlxsmith/cli.py,sha256=YHjrPti1OCe0m2gpEwygiJ6_x-xeycKHRDYCksdcOuM,35750
-mlxsmith/config.py,sha256=iFDhO7KcAtq1KySGD7TfNMYqbQcec0Tc0-VJdr5Gwo4,16385
+mlxsmith/cli.py,sha256=I7vzO2Z3ja1ncMOGHhXSEN_kElfWdmA6Jvs4rFgINZ0,35767
+mlxsmith/config.py,sha256=K1VbN-3WoWf4vzZ6BNeTgEz2DFH8s9YminqwyNBVLj0,16389
 mlxsmith/config_models.py,sha256=pMsLGyC9J9T9jqs5ipYFUuEkfir7iiDA4EZdcb65k5g,8407
 mlxsmith/data.py,sha256=3ZlNS8bnD7LlWACEmULbf8RGQzCuf0QulFpI1PWvNuI,16160
 mlxsmith/eval.py,sha256=nSARSEKKwZM8Ot5rUDDpGikaClGNxvg0ifgGkTA6mM0,3792
 mlxsmith/infer.py,sha256=ekpHhTird0dnTJzFOc-O98rjwkEKgAr9AFicKlaB3MA,4610
 mlxsmith/models.py,sha256=BRaPTxzqy-5KEKdccveMgjpbRP0ZmbRnA_su8rz2P4k,8033
 mlxsmith/runs.py,sha256=2voYBryGGMlAKskHJ7TDiIPQL2_fFxSQ8RgtfGZ7ccg,1409
-mlxsmith/server.py,sha256=CBxmV9WwpyivsHytVALgT4DOGh1vqjwm4W_iHBH79rg,10821
+mlxsmith/server.py,sha256=Fk-i9xK_Teq0Z0m-W1GRJVtcG0nYvd4bQ85lnAUuT1w,10690
 mlxsmith/util.py,sha256=8fagFtMP2YS1qlWkCt8bPWAz2jAgfvAf1ipNGJZAgIk,4544
 mlxsmith/accel/__init__.py,sha256=Mv2mj-2bLqUILhMUCjMLu7JORcu0-cGBOri32j7O5Xo,291
 mlxsmith/accel/base.py,sha256=o0kmxV68KbxOmucC3eDjKbFA8dfTT2ETqN0XD_l3mM0,435
 mlxsmith/accel/none.py,sha256=WhxECIBv-pE63Vh1Iv86ObgT9JHOi4hA4BUyJc__sKU,362
 mlxsmith/accel/zmlx_backend.py,sha256=JfzQ44v9hrCJgcqU018ZD7qLNlubIe09CwYRpKyfwR8,1529
 mlxsmith/api/__init__.py,sha256=IrpIXDkUJm4BZqahYOK_0BkxvomlscEvCoLCm4GDxo8,998
-mlxsmith/api/handlers.py,sha256=5YKQfHLG2mWjCugOkG1L3veiPMz_Qg_s4h12xx6eibs,47068
+mlxsmith/api/handlers.py,sha256=94Spq4glFp7mRwmKrFqt7erd1Af_PxVP_vpxCo2UFdQ,46896
 mlxsmith/api/schemas.py,sha256=Q18kF9FKtvT1vdnXy6feSNTtCV2FiRWDzfdsPzc0te8,19316
 mlxsmith/envs/__init__.py,sha256=t7QiEHtfyP1dUCj-4TJUN0hd9lRqBKYd5Ek7dgEwus4,671
 mlxsmith/envs/system.py,sha256=2bChkOxm2S7d0WCrweHGhoI6-xOYDxlC0YbHH6Ibjq4,12782
 mlxsmith/envs/token_env.py,sha256=rhv2o3eI1GyTtfAXG72z-31amNGaLv0KW56mEsWkXlY,6709
 mlxsmith/llm/__init__.py,sha256=jWEkXGdvwZ8tUYHVqWW3SYHXG-LSWaGbdwOR0mF_4Zw,396
 mlxsmith/llm/backend.py,sha256=_xGfdJ30_6Nq8zIqMjTemCZWV-vUi9qV0djfwxny5SM,4335
-mlxsmith/llm/interface.py,sha256=zmqJuyiEQqjEpDjZwooS1hp1GfTPCTiWYJ5nPw8xSqg,6633
+mlxsmith/llm/interface.py,sha256=udQl_R7ecmM4Nh20P50Nmnv2h853ByrgevjQIRDxX4g,6601
 mlxsmith/llm/mlx_lm_backend.py,sha256=OitqY_6LqnfqbN7GZz39Ma3cItjjNqHXF3SV3AZsHxk,18284
-mlxsmith/llm/mock_backend.py,sha256=DXXnMlXZGCtf0datgjFc5z_X8JMEKaYDRPXKU1erGdQ,7449
+mlxsmith/llm/mock_backend.py,sha256=ZTkPRnRmCXSkhOJ1TLbSgTnAcLTKa9sq6-zzbTEX6Qc,7439
 mlxsmith/llm/registry.py,sha256=ZmYE-WclyMo6z0HwUufqt3tKT4E84xZ6I-PFu1Z5nws,309
 mlxsmith/orchestrator/__init__.py,sha256=oc4qIkISZMGMvYeOqU8lDDmFL3uxDYJHsv_rra9DH-E,792
-mlxsmith/orchestrator/daemon.py,sha256=MRQf84sCjeShBFcOcF7yfP6HhHl7IHrnmw0nV50mJrI,16360
-mlxsmith/orchestrator/inference_worker.py,sha256=4AI_j7qnrnwXObBxSa7YHlZHnGfqou-W1fpqHYt8jpQ,17981
-mlxsmith/orchestrator/queue.py,sha256=8c1n-fiW3ITcLbwOeFlH_AEZdJYvZumy8EczJ0lh4NA,11301
-mlxsmith/orchestrator/trainer_worker.py,sha256=uvJQesXjfdsiNRsO2FVHhUk0WmMB_PQNSPff5U9Swp4,16061
+mlxsmith/orchestrator/daemon.py,sha256=VJFF8s_85h4C-YM14wRUlzDHrhhVPfQOztmNLKyRk30,16107
+mlxsmith/orchestrator/inference_worker.py,sha256=PfmsanrBnx9HZNqG00jTQQTKqDa2bl-wUtYAWtxfzvs,17963
+mlxsmith/orchestrator/queue.py,sha256=E8VymvJi2zEpuTwsG7JB-vROJGGS5evPPhIpkmdwtq4,11286
+mlxsmith/orchestrator/trainer_worker.py,sha256=IM7vOhkYlCKoQSC1tNobkeIVnkmsz4DtAWfu24bjJXY,16015
 mlxsmith/rlm/__init__.py,sha256=Q09oRONXWTFXuWwMJOpGWg0I-UDkuib0OA1O_cNFp2U,236
 mlxsmith/rlm/corpus.py,sha256=-p12H650_ybe2kXC219M4wXYpD08QHUpY92ErVjSfX8,2112
 mlxsmith/rlm/gating.py,sha256=L18niYKEezphASdsgzW6pz3PN7ylA-L5Wu4_GLLVfHw,2455
 mlxsmith/rlm/generate.py,sha256=q1v_TP8sqVj05omhoF60Ns1iX6yClgc7lP6njz4lK18,7601
 mlxsmith/rlm/history.py,sha256=Vm4JtWqsZnqB-fuo3zWfweeogmmLTL3VHaYZ45vrkz8,299
 mlxsmith/rlm/inference.py,sha256=ntCEKxD1KrkIXgZNQbD4jhS5rJPtwcVYc8qLc5E5cnc,5297
-mlxsmith/rlm/loop.py,sha256=WLRP1PI5PX7OjSEZsurrCwSTVVPNVIx4-7td1ihSMY0,49955
+mlxsmith/rlm/loop.py,sha256=nkAR5KYErcq893kQYigFkl3NG469ZUu1CkN_sS1ObAM,49900
 mlxsmith/rlm/mutate.py,sha256=_NUNMpVCRaEDgtzI8J2NOTcj5NnycZnP_UoUpFacjTs,2553
 mlxsmith/rlm/trainer.py,sha256=RRXPlJy4SySpLZGge0ORMYs7HeiWgfGQNeMBOBfG4Ys,3014
-mlxsmith/rlm/weights.py,sha256=NO7wjl2T0eXTVFoYrzPT_IUmaLvD2z-zSSyKpcX93kY,8463
+mlxsmith/rlm/weights.py,sha256=tgl4Uc80QF9YpCCr3ewBmL7uru9As2fDA1Z1SgZn-e4,8455
 mlxsmith/sdk/__init__.py,sha256=42WpTgC309sYKp8SArULBWz0trVN51THcjvPdVh-thc,10777
-mlxsmith/sdk/future.py,sha256=mleqPgJ997hSuZuQegvS2GoOxqo_gd4pfh37gv70APc,16873
+mlxsmith/sdk/future.py,sha256=WmYB-fDstaEuv-FUNX_S7IJSENbVEsYYEEVzH02ImLk,16868
 mlxsmith/sdk/losses.py,sha256=lJi3R7Red_QO3IatbhKi_GBI7kM0yu-kS14xN2kX_04,7532
-mlxsmith/sdk/sampling_client.py,sha256=AkjJHEZ8OLU8SZoALk7ds3NHMquiqk9GGZHqni5vB2g,24942
-mlxsmith/sdk/training_client.py,sha256=4GXcswTJypkymPO3E5DREUesGzlumbmUrTnt0T80wEI,24380
+mlxsmith/sdk/sampling_client.py,sha256=o7jfgYpVWXrrIHo4-SrGAJx4FAlYdo198da27Jp0Yj4,24899
+mlxsmith/sdk/training_client.py,sha256=71bSgS65ofXL9X4qGSyDhVL5asDxENAf95LV66pwC3g,24341
 mlxsmith/train/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 mlxsmith/train/distill.py,sha256=9Xbzn6zt8uqYOwg_pJB2rZJj8y-nESP1OB8DLxnJ0jM,10378
-mlxsmith/train/lora.py,sha256=E8ymi1wUVsGp5-0DldvkSWDHNh_5Fhr6AelJOR5YoPw,8859
+mlxsmith/train/lora.py,sha256=k3aNqBjMyE6rPGS2CAJRSDsTJiUa1ztjrA3k9N87IjY,9046
 mlxsmith/train/pref.py,sha256=-z7mj-MQu6dPb8y0U6sRFbqKU0nNQ3YRpC3CcmS3l5k,6987
 mlxsmith/train/rft.py,sha256=bf6z-h6VQKvMDZ0XN_ayZV44YsIvuwSzP1YRYrwSQ2M,18956
 mlxsmith/train/sft.py,sha256=w3QmLLoscNQzz-xbtmrCw46PbYIApvgeQi0XjxCop90,5590
@@ -61,9 +61,9 @@ mlxsmith/verifiers/jsonschema.py,sha256=hG_8c07Hwv-tpN2g0oxELwmLRxS8QGzRFwabmo4y
 mlxsmith/verifiers/pytest_verifier.py,sha256=ARNajzxUPNwtzSow6I2d0mLopZyvY29_d3F1sYVwEUY,2514
 mlxsmith/verifiers/regex.py,sha256=N7z3koE8Iy-a4DBs4404iQCNX2WGxequm5g4umric2Y,524
 mlxsmith/verifiers/types.py,sha256=FytBxB1OnNX1EcqZXSSs3WvL0GRv7byW4mfBMf6xP68,240
-mlxsmith-0.1.0.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
-mlxsmith-0.1.0.dist-info/METADATA,sha256=yRnNRKdCFPZQRgAN7qtxCDBrUJxplOqK3mTV_Uoh2jA,4812
-mlxsmith-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-mlxsmith-0.1.0.dist-info/entry_points.txt,sha256=ys7GcKEjhzhkkTMBmmaNavTgsjqOuFnCKIG2w8Wcn6w,46
-mlxsmith-0.1.0.dist-info/top_level.txt,sha256=hKBwc8bn7uoI-_5Yhcq1T3IuChFhUFdzItIkZK1up6A,9
-mlxsmith-0.1.0.dist-info/RECORD,,
+mlxsmith-0.1.1.dist-info/licenses/LICENSE,sha256=ESYyLizI0WWtxMeS7rGVcX3ivMezm-HOd5WdeOh-9oU,1056
+mlxsmith-0.1.1.dist-info/METADATA,sha256=d0H3FBZa2-BfGVbz3TWpjtgWhMe-t46bp8PQELh5yFQ,9131
+mlxsmith-0.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+mlxsmith-0.1.1.dist-info/entry_points.txt,sha256=ys7GcKEjhzhkkTMBmmaNavTgsjqOuFnCKIG2w8Wcn6w,46
+mlxsmith-0.1.1.dist-info/top_level.txt,sha256=hKBwc8bn7uoI-_5Yhcq1T3IuChFhUFdzItIkZK1up6A,9
+mlxsmith-0.1.1.dist-info/RECORD,,

mlxsmith-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,163 +0,0 @@
-Metadata-Version: 2.4
-Name: mlxsmith
-Version: 0.1.0
-Summary: Apple Silicon MLX fine-tuning and OpenAI-compatible serving (SFT stable; preference/RL experimental).
-Author-email: Shannon Labs <hmbown@gmail.com>
-License: MIT
-Project-URL: Homepage, https://github.com/Hmbown/MLXSmith
-Project-URL: Repository, https://github.com/Hmbown/MLXSmith
-Project-URL: Issues, https://github.com/Hmbown/MLXSmith/issues
-Keywords: mlx,apple-silicon,llm,fine-tuning,lora,openai-compatible
-Classifier: Development Status :: 3 - Alpha
-Classifier: Intended Audience :: Developers
-Classifier: Intended Audience :: Science/Research
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Operating System :: MacOS :: MacOS X
-Requires-Python: >=3.10
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: typer>=0.9.0
-Requires-Dist: rich>=13.7.0
-Requires-Dist: pyyaml>=6.0.1
-Requires-Dist: pydantic>=2.5.0
-Requires-Dist: pydantic-settings>=2.2.1
-Requires-Dist: tomli>=2.0.1; python_version < "3.11"
-Requires-Dist: huggingface_hub>=1.3.4
-Requires-Dist: jsonschema>=4.21.0
-Provides-Extra: mlx
-Requires-Dist: mlx>=0.30.4; extra == "mlx"
-Provides-Extra: llm
-Requires-Dist: mlx-lm>=0.30.5; extra == "llm"
-Requires-Dist: transformers>=5.0.0; extra == "llm"
-Requires-Dist: datasets>=3.0.0; extra == "llm"
-Provides-Extra: serve
-Requires-Dist: fastapi>=0.128.0; extra == "serve"
-Requires-Dist: uvicorn>=0.40.0; extra == "serve"
-Requires-Dist: httpx>=0.28.0; extra == "serve"
-Provides-Extra: zmlx
-Requires-Dist: zmlx; extra == "zmlx"
-Provides-Extra: dev
-Requires-Dist: pytest>=9.0.0; extra == "dev"
-Requires-Dist: ruff>=0.14.0; extra == "dev"
-Provides-Extra: all
-Requires-Dist: mlx>=0.30.4; extra == "all"
-Requires-Dist: mlx-lm>=0.30.5; extra == "all"
-Requires-Dist: transformers>=5.0.0; extra == "all"
-Requires-Dist: datasets>=3.0.0; extra == "all"
-Requires-Dist: fastapi>=0.128.0; extra == "all"
-Requires-Dist: uvicorn>=0.40.0; extra == "all"
-Requires-Dist: httpx>=0.28.0; extra == "all"
-Dynamic: license-file
-# mlxsmith
-Apple Silicon MLX fine-tuning and OpenAI-compatible serving.
-SFT + serving are stable. Preference/RL/RLM features are experimental.
-Status: alpha (2026-02-02).
-## Stable features
-- Project init, config, data tools, HF auth, model pull/convert.
-- SFT (LoRA/QLoRA) training with run tracking and adapters.
-- Inference and OpenAI-compatible /v1/chat/completions serving.
-- Basic eval/bench and verifier plumbing (regex/jsonschema/pytest).
-## Experimental features
-- Preference tuning (DPO/ORPO).
-- GRPO-style RFT.
-- RLM self-play loop (research).
-- Distill/OPD and orchestrated RLM.
-## Install
-MLX is only available on Apple Silicon. Other platforms can still use data tools
-and mock backends, but MLX training and serving require macOS on Apple Silicon.
-```bash
-python -m venv .venv && source .venv/bin/activate
-pip install -U pip
-# Core CLI
-pip install mlxsmith
-# Apple Silicon training + serving
-pip install "mlxsmith[mlx,llm,serve]"
-```
-## Quickstart
-```bash
-mlxsmith init myproj
-cd myproj
-mlxsmith doctor
-```
-## HF auth (optional)
-```bash
-mlxsmith auth login --token "$HF_TOKEN"
-mlxsmith auth status
-mlxsmith auth logout
-```
-## Pull + convert a model (HF -> MLX)
-```bash
-mlxsmith pull Qwen/Qwen3-4B-Instruct-2507
-# outputs to cache/mlx/Qwen__Qwen3-4B-Instruct-2507
-```
-Optional quantization:
-```bash
-mlxsmith pull Qwen/Qwen3-4B-Instruct-2507 --quantize --q-bits 4
-```
-## SFT (LoRA/QLoRA)
-```bash
-mlxsmith sft --model cache/mlx/Qwen__Qwen3-4B-Instruct-2507 --data data/sft
-```
-## Serve (OpenAI-compatible)
-```bash
-mlxsmith serve --model runs/sft_0001/adapter --port 8080
-```
-Sample request:
-```bash
-curl http://localhost:8080/v1/chat/completions \
-  -H 'Content-Type: application/json' \
-  -d '{"messages":[{"role":"user","content":"Hello"}],"max_tokens":64}'
-```
-To enable the optional UI/monitor dashboard, set `serve.ui: true` in `mlxsmith.yaml`.
-## Experimental commands
-- `mlxsmith pref` (DPO/ORPO)
-- `mlxsmith rft` (GRPO-style)
-- `mlxsmith rlm` / `mlxsmith pipeline` (self-play loop)
-- `mlxsmith distill` (offline/OPD)
-- `mlxsmith eval` / `mlxsmith bench`
-## Docs
-- `docs/PROJECT_FORMAT.md` for project layout and artifacts.
-- `docs/VERIFIERS.md` for verifier API and sandbox behavior.
-- `docs/COMPATIBILITY.md` for tested versions and model families.
-- `docs/ENVIRONMENTS.md` for the environment plugin system.
-- `docs/ROADMAP.md` for product direction and milestones.
-- `docs/README.md` for the full docs index.
-## License
-MIT

{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mlxsmith-0.1.0.dist-info → mlxsmith-0.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

mlxsmith 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

mlxsmith 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl