PyPI - mlxsmith - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mlxsmith 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

mlxsmith/__init__.py +2 -0
mlxsmith/accel/__init__.py +10 -0
mlxsmith/accel/base.py +17 -0
mlxsmith/accel/none.py +13 -0
mlxsmith/accel/zmlx_backend.py +42 -0
mlxsmith/adapters.py +46 -0
mlxsmith/api/__init__.py +48 -0
mlxsmith/api/handlers.py +1217 -0
mlxsmith/api/schemas.py +436 -0
mlxsmith/auth.py +88 -0
mlxsmith/bench.py +102 -0
mlxsmith/cli.py +950 -0
mlxsmith/config.py +543 -0
mlxsmith/config_models.py +261 -0
mlxsmith/data.py +493 -0
mlxsmith/envs/__init__.py +33 -0
mlxsmith/envs/system.py +388 -0
mlxsmith/envs/token_env.py +191 -0
mlxsmith/eval.py +112 -0
mlxsmith/infer.py +140 -0
mlxsmith/llm/__init__.py +16 -0
mlxsmith/llm/backend.py +126 -0
mlxsmith/llm/interface.py +212 -0
mlxsmith/llm/mlx_lm_backend.py +509 -0
mlxsmith/llm/mock_backend.py +228 -0
mlxsmith/llm/registry.py +12 -0
mlxsmith/models.py +257 -0
mlxsmith/orchestrator/__init__.py +25 -0
mlxsmith/orchestrator/daemon.py +454 -0
mlxsmith/orchestrator/inference_worker.py +496 -0
mlxsmith/orchestrator/queue.py +355 -0
mlxsmith/orchestrator/trainer_worker.py +437 -0
mlxsmith/rlm/__init__.py +8 -0
mlxsmith/rlm/corpus.py +74 -0
mlxsmith/rlm/gating.py +90 -0
mlxsmith/rlm/generate.py +249 -0
mlxsmith/rlm/history.py +12 -0
mlxsmith/rlm/inference.py +150 -0
mlxsmith/rlm/loop.py +1297 -0
mlxsmith/rlm/mutate.py +82 -0
mlxsmith/rlm/trainer.py +73 -0
mlxsmith/rlm/weights.py +263 -0
mlxsmith/runs.py +44 -0
mlxsmith/sdk/__init__.py +392 -0
mlxsmith/sdk/future.py +486 -0
mlxsmith/sdk/losses.py +262 -0
mlxsmith/sdk/sampling_client.py +729 -0
mlxsmith/sdk/training_client.py +676 -0
mlxsmith/server.py +376 -0
mlxsmith/train/__init__.py +0 -0
mlxsmith/train/distill.py +279 -0
mlxsmith/train/lora.py +280 -0
mlxsmith/train/pref.py +180 -0
mlxsmith/train/rft.py +458 -0
mlxsmith/train/sft.py +151 -0
mlxsmith/util.py +174 -0
mlxsmith/verifiers/__init__.py +3 -0
mlxsmith/verifiers/compose.py +109 -0
mlxsmith/verifiers/docker_verifier.py +111 -0
mlxsmith/verifiers/jsonschema.py +54 -0
mlxsmith/verifiers/pytest_verifier.py +82 -0
mlxsmith/verifiers/regex.py +15 -0
mlxsmith/verifiers/types.py +10 -0
mlxsmith-0.1.0.dist-info/METADATA +163 -0
mlxsmith-0.1.0.dist-info/RECORD +69 -0
mlxsmith-0.1.0.dist-info/WHEEL +5 -0
mlxsmith-0.1.0.dist-info/entry_points.txt +2 -0
mlxsmith-0.1.0.dist-info/licenses/LICENSE +21 -0
mlxsmith-0.1.0.dist-info/top_level.txt +1 -0

mlxsmith/rlm/mutate.py ADDED Viewed

@@ -0,0 +1,82 @@
+from __future__ import annotations
+import json
+from typing import Iterable, List, Optional
+from ..util import sha1_text
+from .generate import GeneratedTask, extract_json_objects, task_to_prompt, task_to_tests
+def mutate_tasks(
+    llm,
+    tasks: Iterable[GeneratedTask],
+    *,
+    mutations_per_task: int,
+    max_total: Optional[int] = None,
+    temperature: float = 0.7,
+    max_new_tokens: int = 512,
+    top_p: float = 1.0,
+    top_k: Optional[int] = None,
+    require_recursion: bool = False,
+) -> List[GeneratedTask]:
+    tasks_list = list(tasks)
+    if mutations_per_task <= 0 or not tasks_list:
+        return tasks_list
+    mutated: List[GeneratedTask] = list(tasks_list)
+    for task in tasks_list:
+        for idx in range(mutations_per_task):
+            prompt = (
+                "Mutate the following coding task to increase diversity. "
+                "Return ONE JSON object with fields: id, description, signature, tests.\n\n"
+                f"ORIGINAL_ID: {task.id}\n"
+                f"ORIGINAL_PROMPT:\n{task.prompt}\n\n"
+                f"ORIGINAL_TESTS:\n{task.tests}\n"
+            )
+            gen = llm.generate(
+                prompt,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+            )
+            items = extract_json_objects(gen.text)
+            if not items:
+                continue
+            item = items[0]
+            tid = item.get("id") or f"{task.id}_m{idx}" or sha1_text(json.dumps(item, sort_keys=True))[:12]
+            task_prompt = task_to_prompt(item, require_recursion=require_recursion)
+            tests = task_to_tests(item)
+            if len(task_prompt) < 10 or not tests:
+                continue
+            mutated.append(
+                GeneratedTask(
+                    id=str(tid),
+                    prompt=task_prompt,
+                    tests=tests,
+                    description=item.get("description"),
+                )
+            )
+            if max_total is not None and len(mutated) >= max_total:
+                break
+        if max_total is not None and len(mutated) >= max_total:
+            break
+    # Deduplicate by id/prompt hash
+    seen = set()
+    deduped: List[GeneratedTask] = []
+    for t in mutated:
+        key = t.id or sha1_text(t.prompt)
+        if key in seen:
+            continue
+        seen.add(key)
+        deduped.append(t)
+        if max_total is not None and len(deduped) >= max_total:
+            break
+    return deduped

mlxsmith/rlm/trainer.py ADDED Viewed

@@ -0,0 +1,73 @@
+from __future__ import annotations
+from collections import defaultdict
+from typing import Iterable, Optional
+from ..config import ProjectConfig
+from ..util import now_ts, latency_summary_ms
+from .inference import Rollout
+def train_on_rollouts(
+    llm,
+    rollouts: Iterable[Rollout],
+    cfg: ProjectConfig,
+    *,
+    optimizer: object,
+    train_adapter: Optional[str] = None,
+    ref_llm: Optional[object] = None,
+) -> list[dict]:
+    grouped = defaultdict(list)
+    for r in rollouts:
+        grouped[r.task_id].append(r)
+    metrics_rows: list[dict] = []
+    for task_id, rows in grouped.items():
+        if not rows:
+            continue
+        mean_r = sum(r.reward for r in rows) / max(1, len(rows))
+        std_r = (sum((r.reward - mean_r) ** 2 for r in rows) / max(1, len(rows))) ** 0.5
+        advs = [r.reward - mean_r for r in rows]
+        if bool(cfg.rft.normalize_advantage) and std_r > 1e-6:
+            advs = [a / std_r for a in advs]
+        def loss_fn(_model):
+            loss = llm.mx.array(0.0)  # type: ignore
+            for rollout, adv in zip(rows, advs):
+                logp = llm.sequence_logprob(rollout.token_ids, prompt_len=rollout.prompt_len)
+                if rollout.logprobs and rollout.weight_adapter and rollout.weight_adapter != train_adapter:
+                    behavior_logp = llm.mx.array(sum(rollout.logprobs))  # type: ignore
+                    ratio = llm.mx.exp(logp - behavior_logp)  # type: ignore
+                    pg = -ratio * llm.mx.array(float(adv))  # type: ignore
+                else:
+                    pg = -llm.mx.array(float(adv)) * logp  # type: ignore
+                if ref_llm is not None and cfg.rft.kl_coeff > 0:
+                    ref_logp = ref_llm.sequence_logprob(rollout.token_ids, prompt_len=rollout.prompt_len)
+                    pg = pg + llm.mx.array(cfg.rft.kl_coeff) * (logp - ref_logp)  # type: ignore
+                loss = loss + pg
+            return loss / llm.mx.array(float(len(rows)))  # type: ignore
+        lval, grads = llm.value_and_grad(loss_fn)
+        if grads is not None:
+            llm.apply_grads(optimizer, grads)
+        latency_summary = latency_summary_ms([float(r.verifier_latency_ms) for r in rows])
+        metrics = {
+            "ts": now_ts(),
+            "task_id": task_id,
+            "mean_reward": mean_r,
+            "std_reward": std_r,
+            "loss": float(lval.item()) if hasattr(lval, "item") else float(lval),
+            "verifier_latency_ms": latency_summary.get("mean", 0.0),
+            "verifier_latency_ms_mean": latency_summary.get("mean", 0.0),
+            "verifier_latency_ms_p50": latency_summary.get("p50", 0.0),
+            "verifier_latency_ms_p90": latency_summary.get("p90", 0.0),
+            "verifier_latency_ms_p99": latency_summary.get("p99", 0.0),
+            "verifier_latency_ms_max": latency_summary.get("max", 0.0),
+            "weight_adapter": rows[0].weight_adapter,
+        }
+        metrics_rows.append(metrics)
+    return metrics_rows

mlxsmith/rlm/weights.py ADDED Viewed

@@ -0,0 +1,263 @@
+"""Weight pointer system for tracking adapter weights across RLM iterations.
+Extends to support IPC for multi-process orchestration with atomic updates
+and hot-reload capabilities.
+"""
+from __future__ import annotations
+import json
+import multiprocessing as mp
+import time
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from typing import Optional, Callable
+from ..util import ensure_dir, now_ts
+@dataclass
+class WeightPointer:
+    base_model: str
+    adapter_path: Optional[str]
+    iteration: int
+    updated_at: str
+    name: Optional[str] = None
+@dataclass
+class WeightPointerIPC:
+    """Extended WeightPointer with IPC support for multi-process orchestration.
+    Includes versioning and atomic update mechanisms for hot-reloading.
+    """
+    base_model: str
+    adapter_path: Optional[str]
+    iteration: int
+    updated_at: str
+    version: int = 0  # Monotonic version for ordering updates
+    checksum: Optional[str] = None  # Optional checksum for integrity
+    name: Optional[str] = None
+    def to_dict(self) -> dict:
+        return {
+            "base_model": self.base_model,
+            "adapter_path": self.adapter_path,
+            "iteration": self.iteration,
+            "updated_at": self.updated_at,
+            "version": self.version,
+            "checksum": self.checksum,
+            "name": self.name,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "WeightPointerIPC":
+        return cls(
+            base_model=data["base_model"],
+            adapter_path=data.get("adapter_path"),
+            iteration=data.get("iteration", 0),
+            updated_at=data.get("updated_at", now_ts()),
+            version=data.get("version", 0),
+            checksum=data.get("checksum"),
+            name=data.get("name"),
+        )
+class WeightPointerStore:
+    """Atomic weight pointer store for IPC between processes.
+    Uses file-based atomic updates with versioning to ensure
+    inference workers always see consistent state.
+    """
+    def __init__(self, weights_dir: Path):
+        self._weights_dir = Path(weights_dir)
+        self._lock = mp.Lock()
+    def get_path(self, name: str) -> Path:
+        """Get the storage path for a named pointer."""
+        return self._weights_dir / f"{name}.json"
+    def get_atomic_path(self, name: str) -> Path:
+        """Get the temporary path for atomic writes."""
+        return self._weights_dir / f".{name}.tmp"
+    def load(self, name: str, base_model: str) -> WeightPointerIPC:
+        """Load a weight pointer from storage."""
+        path = self.get_path(name)
+        with self._lock:
+            if not path.exists():
+                return WeightPointerIPC(
+                    base_model=base_model,
+                    adapter_path=None,
+                    iteration=0,
+                    updated_at=now_ts(),
+                    version=0,
+                    name=name,
+                )
+            try:
+                data = json.loads(path.read_text(encoding="utf-8"))
+                return WeightPointerIPC.from_dict(data)
+            except Exception:
+                return WeightPointerIPC(
+                    base_model=base_model,
+                    adapter_path=None,
+                    iteration=0,
+                    updated_at=now_ts(),
+                    version=0,
+                    name=name,
+                )
+    def save(self, pointer: WeightPointerIPC) -> None:
+        """Atomically save a weight pointer."""
+        path = self.get_path(pointer.name or "default")
+        tmp_path = self.get_atomic_path(pointer.name or "default")
+        ensure_dir(self._weights_dir)
+        with self._lock:
+            # Write to temp file
+            tmp_path.write_text(
+                json.dumps(pointer.to_dict(), indent=2),
+                encoding="utf-8",
+            )
+            # Atomic rename
+            tmp_path.rename(path)
+    def update(
+        self,
+        name: str,
+        adapter_path: Optional[str] = None,
+        iteration: Optional[int] = None,
+        checksum: Optional[str] = None,
+    ) -> WeightPointerIPC:
+        """Update a weight pointer atomically."""
+        current = self.load(name, "")  # base_model will be preserved
+        new_pointer = WeightPointerIPC(
+            base_model=current.base_model,
+            adapter_path=adapter_path if adapter_path is not None else current.adapter_path,
+            iteration=iteration if iteration is not None else current.iteration,
+            updated_at=now_ts(),
+            version=current.version + 1,
+            checksum=checksum,
+            name=name,
+        )
+        self.save(new_pointer)
+        return new_pointer
+    def watch(
+        self,
+        name: str,
+        base_model: str,
+        callback: Callable[[WeightPointerIPC], None],
+        poll_interval: float = 1.0,
+    ) -> "WeightWatcher":
+        """Create a watcher that monitors for pointer changes."""
+        return WeightWatcher(self, name, base_model, callback, poll_interval)
+class WeightWatcher:
+    """Watches a weight pointer for changes and triggers callbacks.
+    Used by inference workers to hot-reload weights when updates
+    are published by the trainer.
+    """
+    def __init__(
+        self,
+        store: WeightPointerStore,
+        name: str,
+        base_model: str,
+        callback: Callable[[WeightPointerIPC], None],
+        poll_interval: float = 1.0,
+    ):
+        self._store = store
+        self._name = name
+        self._base_model = base_model
+        self._callback = callback
+        self._poll_interval = poll_interval
+        self._last_version = -1
+        self._running = False
+        self._process: Optional[mp.Process] = None
+    def start(self) -> None:
+        """Start watching in a background process."""
+        self._running = True
+        self._process = mp.Process(target=self._watch_loop)
+        self._process.start()
+    def stop(self) -> None:
+        """Stop the watcher."""
+        self._running = False
+        if self._process:
+            self._process.join(timeout=5.0)
+            if self._process.is_alive():
+                self._process.terminate()
+            self._process = None
+    def _watch_loop(self) -> None:
+        """Internal watch loop running in separate process."""
+        while self._running:
+            try:
+                pointer = self._store.load(self._name, self._base_model)
+                if pointer.version > self._last_version:
+                    self._last_version = pointer.version
+                    self._callback(pointer)
+            except Exception:
+                pass  # Continue watching despite errors
+            time.sleep(self._poll_interval)
+def load_pointer(path: Path, *, base_model: str, name: Optional[str] = None) -> WeightPointer:
+    """Load a weight pointer from disk (backward compatible)."""
+    if not path.exists():
+        return WeightPointer(
+            base_model=base_model,
+            adapter_path=None,
+            iteration=0,
+            updated_at=now_ts(),
+            name=name,
+        )
+    data = json.loads(path.read_text(encoding="utf-8"))
+    return WeightPointer(
+        base_model=data.get("base_model") or base_model,
+        adapter_path=data.get("adapter_path"),
+        iteration=int(data.get("iteration", 0)),
+        updated_at=data.get("updated_at") or now_ts(),
+        name=data.get("name") or name,
+    )
+def save_pointer(path: Path, pointer: WeightPointer) -> None:
+    """Save a weight pointer to disk (backward compatible)."""
+    ensure_dir(path.parent)
+    path.write_text(
+        json.dumps(
+            {
+                "base_model": pointer.base_model,
+                "adapter_path": pointer.adapter_path,
+                "iteration": pointer.iteration,
+                "updated_at": pointer.updated_at,
+                "name": pointer.name,
+            },
+            indent=2,
+        ),
+        encoding="utf-8",
+    )
+def load_pointer_ipc(path: Path, base_model: str, name: str) -> WeightPointerIPC:
+    """Load an IPC-enabled weight pointer."""
+    store = WeightPointerStore(path.parent)
+    return store.load(name, base_model)
+def save_pointer_ipc(path: Path, pointer: WeightPointerIPC) -> None:
+    """Save an IPC-enabled weight pointer."""
+    store = WeightPointerStore(path.parent)
+    store.save(pointer)

mlxsmith/runs.py ADDED Viewed

@@ -0,0 +1,44 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+import yaml
+from .util import ensure_dir
+@dataclass
+class RunPaths:
+    run_dir: Path
+    logs_dir: Path
+    checkpoints_dir: Path
+    adapter_dir: Path
+    artifacts_dir: Path
+    metrics_path: Path
+    config_snapshot_path: Path
+def new_run(root: Path, kind: str) -> RunPaths:
+    runs_root = ensure_dir(root / "runs")
+    # monotonically increasing id by counting existing runs of same kind
+    existing = sorted([p for p in runs_root.glob(f"{kind}_*") if p.is_dir()])
+    next_idx = len(existing) + 1
+    run_name = f"{kind}_{next_idx:04d}"
+    run_dir = ensure_dir(runs_root / run_name)
+    logs_dir = ensure_dir(run_dir / "logs")
+    checkpoints_dir = ensure_dir(run_dir / "checkpoints")
+    adapter_dir = ensure_dir(run_dir / "adapter")
+    artifacts_dir = ensure_dir(run_dir / "artifacts")
+    metrics_path = run_dir / "metrics.jsonl"
+    config_snapshot_path = run_dir / "config.snapshot.yaml"
+    return RunPaths(
+        run_dir=run_dir,
+        logs_dir=logs_dir,
+        checkpoints_dir=checkpoints_dir,
+        adapter_dir=adapter_dir,
+        artifacts_dir=artifacts_dir,
+        metrics_path=metrics_path,
+        config_snapshot_path=config_snapshot_path,
+    )
+def snapshot_config(cfg_dict: dict, path: Path):
+    path.write_text(yaml.safe_dump(cfg_dict, sort_keys=False), encoding="utf-8")