PyPI - blitz-cli - Versions diffs - 0.1.0__tar.gz - Mend

blitz-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

blitz_cli-0.1.0/.github/workflows/publish.yml +30 -0
blitz_cli-0.1.0/.gitignore +7 -0
blitz_cli-0.1.0/PKG-INFO +34 -0
blitz_cli-0.1.0/README.md +27 -0
blitz_cli-0.1.0/blitz_cli/__init__.py +11 -0
blitz_cli-0.1.0/blitz_cli/_client.py +138 -0
blitz_cli-0.1.0/blitz_cli/_scaffold.py +111 -0
blitz_cli-0.1.0/blitz_cli/cli.py +128 -0
blitz_cli-0.1.0/blitz_cli/templates/Dockerfile.tmpl +16 -0
blitz_cli-0.1.0/blitz_cli/templates/Makefile.tmpl +33 -0
blitz_cli-0.1.0/blitz_cli/templates/README.md.tmpl +36 -0
blitz_cli-0.1.0/blitz_cli/templates/__init__.py +3 -0
blitz_cli-0.1.0/blitz_cli/templates/dockerignore.tmpl +4 -0
blitz_cli-0.1.0/blitz_cli/templates/eval.py.tmpl +84 -0
blitz_cli-0.1.0/blitz_cli/templates/requirements.txt.tmpl +11 -0
blitz_cli-0.1.0/blitz_cli/templates/train.py.tmpl +89 -0
blitz_cli-0.1.0/pyproject.toml +25 -0
blitz_cli-0.1.0/tests/test_scaffold.py +80 -0

blitz_cli-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,30 @@
+name: Publish to PyPI
+on:
+  push:
+    tags:
+      - "v*"   # e.g. v0.1.0
+permissions:
+  contents: read
+  id-token: write  # PyPI Trusted Publishing (OIDC)
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install build tools
+        run: pip install build
+      - name: Build sdist and wheel
+        run: python -m build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1

blitz_cli-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,7 @@
+__pycache__/
+*.pyc
+*.egg-info/
+build/
+dist/
+.venv/
+.pytest_cache/

blitz_cli-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,34 @@
+Metadata-Version: 2.4
+Name: blitz-cli
+Version: 0.1.0
+Summary: Developer CLI for Blitz: pull a workflow's training data + base-model recommendation and scaffold a runnable QLoRA training container.
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+# blitz-cli
+Developer CLI for [Blitz](https://github.com/sparepartslabs/blitz-sdk-py). Pull a
+workflow's captured traces as a fine-tuning dataset + the recommended open base
+model, and scaffold a self-contained, runnable QLoRA training container — then
+close the loop by grading the trained student against the held-out eval set.
+```bash
+pip install blitz-cli
+export BLITZ_API_KEY=blz_...   # a read-scoped project key (mint one in the dashboard)
+blitz scaffold -p proj_abc -w mechanic-assistant -o ./train
+cd train
+make build && make train && make eval
+```
+`blitz scaffold` writes `./train` with `data/dataset.jsonl`, `data/evalset.jsonl`,
+`config.json` (derived from the base-model recommendation), and a `Dockerfile` +
+`train.py` + `eval.py` + `Makefile`. Bring your own NVIDIA GPU.
+This package is intentionally dependency-free (stdlib + the Blitz HTTP API). The
+heavy ML stack (torch / transformers / trl / peft) is pinned only in the
+*generated* project's `requirements.txt`, installed inside the training image.
+The complementary tracing SDK (`pip install blitz-sdk`, `import blitz`) lives in
+[`../blitz-sdk`](../blitz-sdk).

blitz_cli-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,27 @@
+# blitz-cli
+Developer CLI for [Blitz](https://github.com/sparepartslabs/blitz-sdk-py). Pull a
+workflow's captured traces as a fine-tuning dataset + the recommended open base
+model, and scaffold a self-contained, runnable QLoRA training container — then
+close the loop by grading the trained student against the held-out eval set.
+```bash
+pip install blitz-cli
+export BLITZ_API_KEY=blz_...   # a read-scoped project key (mint one in the dashboard)
+blitz scaffold -p proj_abc -w mechanic-assistant -o ./train
+cd train
+make build && make train && make eval
+```
+`blitz scaffold` writes `./train` with `data/dataset.jsonl`, `data/evalset.jsonl`,
+`config.json` (derived from the base-model recommendation), and a `Dockerfile` +
+`train.py` + `eval.py` + `Makefile`. Bring your own NVIDIA GPU.
+This package is intentionally dependency-free (stdlib + the Blitz HTTP API). The
+heavy ML stack (torch / transformers / trl / peft) is pinned only in the
+*generated* project's `requirements.txt`, installed inside the training image.
+The complementary tracing SDK (`pip install blitz-sdk`, `import blitz`) lives in
+[`../blitz-sdk`](../blitz-sdk).

blitz_cli-0.1.0/blitz_cli/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""blitz-cli — pull Blitz training data and scaffold a QLoRA training container.
+Run ``blitz scaffold -p <project> -w <workflow> -o ./train`` to download a
+workflow's SFT dataset, held-out eval set, and base-model recommendation, then
+emit a self-contained, runnable training project (Dockerfile + train.py +
+eval.py). Authenticates with a read-scoped Blitz API key (``BLITZ_API_KEY``).
+"""
+__all__ = ["__version__"]
+__version__ = "0.1.0"

blitz_cli-0.1.0/blitz_cli/_client.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Minimal HTTP client for the Blitz read API (stdlib only).
+Talks to the same backend the SDK pushes traces to, but to the owner-scoped
+export endpoints, authenticated with a read-scoped API key in the ``x-api-key``
+header. NDJSON endpoints are streamed line-by-line so a large dataset never has
+to be buffered in memory.
+"""
+from __future__ import annotations
+import json
+import urllib.error
+import urllib.parse
+import urllib.request
+from typing import Iterator, Optional
+class BlitzAPIError(RuntimeError):
+    """A non-2xx response from the Blitz API, with a developer-friendly hint."""
+    def __init__(self, status: int, message: str) -> None:
+        self.status = status
+        super().__init__(message)
+def _hint(status: int, detail: str) -> str:
+    if status in (401, 403):
+        return (
+            f"{detail} (HTTP {status}). Check BLITZ_API_KEY — it must be a "
+            "read-scoped key for this project (create one in the dashboard)."
+        )
+    if status == 404:
+        return (
+            f"{detail} (HTTP 404). Project or workflow not found, or no usable "
+            "data captured for it yet."
+        )
+    return f"{detail} (HTTP {status})."
+class BlitzClient:
+    def __init__(
+        self, *, endpoint: str, api_key: str, project: str, timeout: float = 60.0
+    ) -> None:
+        self._base = endpoint.rstrip("/")
+        self._headers = {"x-api-key": api_key}
+        self._project = project
+        self._timeout = timeout
+    # -- low level ----------------------------------------------------------
+    def _url(self, path: str, query: Optional[dict] = None) -> str:
+        url = self._base + path
+        params = {k: v for k, v in (query or {}).items() if v is not None}
+        if params:
+            # bools must serialize as true/false to match FastAPI query parsing
+            params = {
+                k: (str(v).lower() if isinstance(v, bool) else v)
+                for k, v in params.items()
+            }
+            url += "?" + urllib.parse.urlencode(params)
+        return url
+    def _open(self, req: urllib.request.Request):
+        try:
+            return urllib.request.urlopen(req, timeout=self._timeout)
+        except urllib.error.HTTPError as exc:
+            detail = exc.reason or "request failed"
+            try:
+                body = json.loads(exc.read().decode("utf-8"))
+                detail = body.get("detail") or body.get("message") or detail
+            except Exception:  # noqa: BLE001
+                pass
+            raise BlitzAPIError(exc.code, _hint(exc.code, str(detail))) from None
+    def _get_json(self, path: str, query: Optional[dict] = None) -> dict:
+        req = urllib.request.Request(self._url(path, query), headers=self._headers)
+        with self._open(req) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    def _get_ndjson(self, path: str, query: Optional[dict] = None) -> Iterator[dict]:
+        req = urllib.request.Request(self._url(path, query), headers=self._headers)
+        with self._open(req) as resp:
+            for raw in resp:
+                line = raw.decode("utf-8").strip()
+                if line:
+                    yield json.loads(line)
+    def _post_json(self, path: str, body: dict) -> dict:
+        data = json.dumps(body).encode("utf-8")
+        req = urllib.request.Request(
+            self._url(path),
+            data=data,
+            method="POST",
+            headers={**self._headers, "content-type": "application/json"},
+        )
+        with self._open(req) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    # -- public API (one method per endpoint the CLI / eval loop needs) -----
+    def _p(self, suffix: str) -> str:
+        return f"/blitz/projects/{self._project}{suffix}"
+    def recommended_base(self, workflow: str) -> dict:
+        return self._get_json(self._p("/recommended-base"), {"workflow": workflow})
+    def recommended_tools(self, workflow: str) -> dict:
+        return self._get_json(self._p("/recommended-tools"), {"workflow": workflow})
+    def eval_summary(self, workflow: str) -> dict:
+        return self._get_json(self._p("/eval-set/summary"), {"workflow": workflow})
+    def download_dataset(
+        self, workflow: Optional[str] = None, include_synthetic: bool = True
+    ) -> Iterator[dict]:
+        return self._get_ndjson(
+            self._p("/dataset"),
+            {"workflow": workflow, "include_synthetic": include_synthetic},
+        )
+    def download_eval_set(self, workflow: Optional[str] = None) -> Iterator[dict]:
+        return self._get_ndjson(self._p("/eval-dataset"), {"workflow": workflow})
+    def submit_eval(
+        self, workflow: str, predictions: list, grader: str = "auto"
+    ) -> dict:
+        return self._post_json(
+            self._p("/eval"),
+            {
+                "workflow": workflow,
+                "candidate": "supplied",
+                "grader": grader,
+                "predictions": predictions,
+            },
+        )
+    def get_eval_run(self, run_id: str) -> dict:
+        return self._get_json(self._p(f"/eval/runs/{run_id}"))

blitz_cli-0.1.0/blitz_cli/_scaffold.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""Render a runnable QLoRA training project from a base-model recommendation.
+Templates live in ``blitz_cli/templates`` and ship as package data. ``train.py``,
+``eval.py`` and ``requirements.txt`` are copied verbatim (they read ``config.json``
+at runtime, so they need no substitution and stay valid Python regardless of the
+recommendation); ``Dockerfile``/``Makefile``/``README`` get ``$var`` substitution
+via string.Template (avoids the brace-escaping pain of str.format on code).
+"""
+from __future__ import annotations
+import json
+from importlib import resources
+from pathlib import Path
+from string import Template
+from typing import Optional
+def _seq_len(rec: dict) -> int:
+    """The clamped training sequence length. Prefer the value the backend now
+    returns; fall back to the same clamp from the signals for older backends."""
+    if rec.get("seq_len"):
+        return int(rec["seq_len"])
+    signals = rec.get("signals") or {}
+    raw = (signals.get("p95_input") or 0) + (signals.get("max_output") or 0)
+    return max(1024, min(8192, raw or 2048))
+def build_config(rec: dict, workflow: str) -> dict:
+    """Derive a QLoRA training config from the /recommended-base response.
+    Hyperparameters scale with model size to keep a single 24GB-class card viable
+    (effective batch ~16-32); seq_len comes straight from the recommendation so the
+    trainer's VRAM accounting matches what the recommender assumed.
+    """
+    model = rec["recommendation"]
+    params_b = float(model.get("params_b") or 7)
+    if params_b <= 3:
+        lora_r, batch, accum, lr = 16, 4, 4, 2e-4
+    elif params_b <= 9:
+        lora_r, batch, accum, lr = 16, 2, 8, 2e-4
+    else:  # 14B and up
+        lora_r, batch, accum, lr = 32, 1, 16, 1e-4
+    return {
+        "workflow": workflow,
+        "base_model_hf": model["hf"],
+        "params_b": params_b,
+        "license": model.get("license", ""),
+        "seq_len": _seq_len(rec),
+        "lora_r": lora_r,
+        "lora_alpha": lora_r * 2,
+        "lora_dropout": 0.05,
+        "epochs": 3,
+        "lr": lr,
+        "per_device_batch_size": batch,
+        "grad_accum": accum,
+    }
+def _template(name: str) -> str:
+    return resources.files("blitz_cli.templates").joinpath(name).read_text(
+        encoding="utf-8"
+    )
+def _write(path: Path, content: str) -> None:
+    path.write_text(content, encoding="utf-8")
+def render(
+    out_dir: Path, rec: dict, project: str, workflow: str, endpoint: str
+) -> dict:
+    """Write the training project into out_dir and return the derived config."""
+    out_dir.mkdir(parents=True, exist_ok=True)
+    config = build_config(rec, workflow)
+    _write(out_dir / "config.json", json.dumps(config, indent=2) + "\n")
+    _write(out_dir / "recommendation.json", json.dumps(rec, indent=2) + "\n")
+    # Verbatim (read config.json at runtime) — keep as real source files.
+    for name in ("train.py", "eval.py", "requirements.txt", "dockerignore"):
+        dest = ".dockerignore" if name == "dockerignore" else name
+        _write(out_dir / dest, _template(name + ".tmpl"))
+    subs = {
+        "project": project,
+        "workflow": workflow,
+        "endpoint": endpoint,
+        "model_name": rec["recommendation"]["name"],
+        "base_model_hf": config["base_model_hf"],
+        "seq_len": str(config["seq_len"]),
+        "license": config["license"],
+    }
+    for name in ("Dockerfile", "Makefile", "README.md"):
+        _write(out_dir / name, Template(_template(name + ".tmpl")).safe_substitute(subs))
+    return config
+def gated_license_warning(rec: dict) -> Optional[str]:
+    """A warning string if the recommended model needs HF license acceptance."""
+    lic = (rec.get("recommendation", {}).get("license") or "").lower()
+    if lic.startswith("llama") or lic.startswith("gemma"):
+        name = rec["recommendation"]["name"]
+        return (
+            f"{name} is a gated model ({lic}). Accept its license on Hugging Face "
+            "and pass HF_TOKEN into the container (see README) before training."
+        )
+    return None

blitz_cli-0.1.0/blitz_cli/cli.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""`blitz` command — pull a workflow's data and scaffold a training container."""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Optional
+from blitz_cli import _scaffold
+from blitz_cli._client import BlitzAPIError, BlitzClient
+_DEFAULT_ENDPOINT = "https://api.sparepartslabs.com"
+def _eprint(*args: object) -> None:
+    print(*args, file=sys.stderr)
+def _cmd_scaffold(args: argparse.Namespace) -> int:
+    api_key = args.api_key or os.environ.get("BLITZ_API_KEY")
+    if not api_key:
+        _eprint("error: no API key. Pass --api-key or set BLITZ_API_KEY (a read-scoped key).")
+        return 2
+    client = BlitzClient(endpoint=args.endpoint, api_key=api_key, project=args.project)
+    out = Path(args.out)
+    data_dir = out / "data"
+    try:
+        rec = client.recommended_base(args.workflow)
+    except BlitzAPIError as exc:
+        _eprint(f"error: {exc}")
+        return 1
+    model = rec["recommendation"]
+    print(f"Recommended base: {model['name']} ({model['hf']}) — {model.get('why', '')}")
+    warning = _scaffold.gated_license_warning(rec)
+    if warning:
+        _eprint(f"warning: {warning}")
+    data_dir.mkdir(parents=True, exist_ok=True)
+    try:
+        n_train = _stream_to_file(
+            client.download_dataset(args.workflow, args.include_synthetic),
+            data_dir / "dataset.jsonl",
+        )
+        n_eval = _stream_to_file(
+            client.download_eval_set(args.workflow), data_dir / "evalset.jsonl"
+        )
+    except BlitzAPIError as exc:
+        _eprint(f"error: {exc}")
+        return 1
+    print(f"Pulled {n_train} training examples → {data_dir / 'dataset.jsonl'}")
+    print(f"Pulled {n_eval} eval examples → {data_dir / 'evalset.jsonl'}")
+    if n_train == 0:
+        _eprint("error: no training examples for this workflow — nothing to train on.")
+        return 1
+    if n_eval == 0:
+        _eprint("warning: held-out eval set is empty — `make eval` will be skipped.")
+    config = _scaffold.render(out, rec, args.project, args.workflow, args.endpoint)
+    print(f"Scaffolded training project → {out}/")
+    print(
+        "\nNext:\n"
+        f"  cd {out}\n"
+        "  export BLITZ_API_KEY=<your read key>   # used by `make eval` to post results\n"
+        "  make build\n"
+        "  make train\n"
+        "  make eval\n"
+        f"\n(base={config['base_model_hf']} seq_len={config['seq_len']} "
+        f"lora_r={config['lora_r']} epochs={config['epochs']})"
+    )
+    return 0
+def _stream_to_file(rows, path: Path) -> int:
+    count = 0
+    with path.open("w", encoding="utf-8") as fh:
+        for row in rows:
+            fh.write(json.dumps(row, ensure_ascii=False) + "\n")
+            count += 1
+    return count
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="blitz", description="Blitz training CLI: pull data + scaffold a trainer."
+    )
+    sub = parser.add_subparsers(dest="cmd", required=True)
+    sc = sub.add_parser(
+        "scaffold",
+        help="Pull a workflow's dataset + recommendation and emit a QLoRA training project.",
+    )
+    sc.add_argument("-p", "--project", required=True, help="Blitz project id")
+    sc.add_argument("-w", "--workflow", required=True, help="Workflow (root span) name")
+    sc.add_argument("-o", "--out", default="./train", help="Output dir (default ./train)")
+    sc.add_argument(
+        "--endpoint",
+        default=os.environ.get("BLITZ_ENDPOINT", _DEFAULT_ENDPOINT),
+        help="Blitz API base URL (env BLITZ_ENDPOINT)",
+    )
+    sc.add_argument(
+        "--api-key", default=None, help="Read-scoped API key (env BLITZ_API_KEY)"
+    )
+    sc.add_argument(
+        "--no-synthetic",
+        dest="include_synthetic",
+        action="store_false",
+        default=True,
+        help="Export real training data only (exclude synthetic augmentation)",
+    )
+    sc.set_defaults(func=_cmd_scaffold)
+    return parser
+def main(argv: Optional[list] = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    return args.func(args)
+if __name__ == "__main__":
+    raise SystemExit(main())

blitz_cli-0.1.0/blitz_cli/templates/Dockerfile.tmpl ADDED Viewed

@@ -0,0 +1,16 @@
+# Trainer image for distilling "$workflow" into $model_name.
+# Base ships torch 2.5.1 + CUDA 12.1 so requirements.txt installs only the
+# QLoRA stack on top (no torch reinstall).
+FROM pytorch/pytorch:2.5.1-cuda12.1-cudnn9-runtime
+WORKDIR /workspace
+ENV PYTHONUNBUFFERED=1 \
+    HF_HOME=/workspace/.hf
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+# Default to training; `make eval` overrides the command.
+CMD ["python", "train.py"]

blitz_cli-0.1.0/blitz_cli/templates/Makefile.tmpl ADDED Viewed

@@ -0,0 +1,33 @@
+# Train / eval the distilled student for the "$workflow" workflow.
+# Requires an NVIDIA GPU + the NVIDIA Container Toolkit (docker --gpus all).
+#
+#   export BLITZ_API_KEY=<your read key>   # used by `make eval` to post results
+#   make build && make train && make eval
+PROJECT ?= $project
+WORKFLOW ?= $workflow
+BLITZ_ENDPOINT ?= $endpoint
+IMAGE ?= blitz-train-$project
+# Mounts the project dir so adapters + the HF cache persist on the host.
+RUN = docker run --rm --gpus all \
+	-v $(CURDIR):/workspace -v $(CURDIR)/.hf:/workspace/.hf \
+	-e BLITZ_API_KEY \
+	-e BLITZ_ENDPOINT=$(BLITZ_ENDPOINT) \
+	-e BLITZ_PROJECT=$(PROJECT) \
+	-e HF_TOKEN \
+	$(IMAGE)
+.PHONY: build train eval shell
+build:
+	docker build -t $(IMAGE) .
+train:
+	$(RUN) python train.py
+eval:
+	$(RUN) python eval.py
+shell:
+	$(RUN) bash

blitz_cli-0.1.0/blitz_cli/templates/README.md.tmpl ADDED Viewed

@@ -0,0 +1,36 @@
+# Distilling `$workflow` → $model_name
+Generated by `blitz scaffold`. This trains a QLoRA adapter on your captured
+`$workflow` traces and grades it against the held-out eval set in Blitz.
+## What's here
+- `data/dataset.jsonl` — SFT training examples (`{"messages": [...]}` per line).
+- `data/evalset.jsonl` — held-out eval inputs (`{example_id, input, reference}`).
+- `recommendation.json` — the base-model recommendation this was built from.
+- `config.json` — training config (base model, seq_len, LoRA + schedule). Edit to taste.
+- `train.py` / `eval.py` — QLoRA SFT and the close-the-loop eval submission.
+- `Dockerfile` / `requirements.txt` / `Makefile` — the CUDA training image.
+## Requirements
+- An NVIDIA GPU + the NVIDIA Container Toolkit (so `docker run --gpus all` works).
+- Base model: `$base_model_hf` (seq_len `$seq_len`).
+## Run
+```bash
+export BLITZ_API_KEY=<your read key>   # the same read-scoped key used to scaffold
+make build
+make train      # writes ./adapter
+make eval       # generates over the eval set, posts predictions to Blitz, prints the run id
+```
+## Gated models
+`$base_model_hf` is licensed `$license`. Llama/Gemma models are gated: accept the
+license on Hugging Face, then `export HF_TOKEN=<hf token>` before `make train`
+(the Makefile forwards `HF_TOKEN` into the container).
+## Notes
+- Trains on the full prompt+completion sequence. To train only on the assistant
+  turn (prompt masking), set `assistant_only_loss=True` on the `SFTConfig` in
+  `train.py` (needs a chat template that emits generation tags).
+- `config.json` hyperparameters are sized for a single 24GB-class card; raise
+  `per_device_batch_size` / `seq_len` if you have more VRAM.

blitz_cli-0.1.0/blitz_cli/templates/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+# Marker so `importlib.resources.files("blitz_cli.templates")` resolves reliably
+# on Python 3.9. The scaffold templates here are read as text (.tmpl), never
+# imported — their torch/transformers references are not CLI dependencies.

blitz_cli-0.1.0/blitz_cli/templates/dockerignore.tmpl ADDED Viewed

@@ -0,0 +1,4 @@
+.hf/
+adapter/
+__pycache__/
+*.pyc

blitz_cli-0.1.0/blitz_cli/templates/eval.py.tmpl ADDED Viewed

@@ -0,0 +1,84 @@
+"""Close the loop: run the trained student over the held-out eval set and submit
+its predictions to Blitz for grading (student-vs-teacher scores in the dashboard).
+Loads ./adapter on top of the 4-bit base, generates an answer for each
+data/evalset.jsonl input, and POSTs them to /eval (candidate=supplied). Uses only
+stdlib urllib for the POST so the container's only heavy deps stay torch +
+transformers. Run inside the container: `make eval`.
+"""
+import json
+import os
+import urllib.request
+import torch
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+with open("config.json") as fh:
+    cfg = json.load(fh)
+endpoint = os.environ["BLITZ_ENDPOINT"].rstrip("/")
+project = os.environ["BLITZ_PROJECT"]
+api_key = os.environ["BLITZ_API_KEY"]
+workflow = cfg["workflow"]
+examples = []
+with open("data/evalset.jsonl") as fh:
+    for line in fh:
+        line = line.strip()
+        if line:
+            examples.append(json.loads(line))
+if not examples:
+    print("Eval set is empty — nothing to score. Skipping.")
+    raise SystemExit(0)
+bnb = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+)
+tokenizer = AutoTokenizer.from_pretrained(cfg["base_model_hf"])
+base = AutoModelForCausalLM.from_pretrained(
+    cfg["base_model_hf"],
+    quantization_config=bnb,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+model = PeftModel.from_pretrained(base, "./adapter").eval()
+predictions = []
+for i, ex in enumerate(examples, 1):
+    messages = [{"role": "user", "content": ex["input"]}]
+    input_ids = tokenizer.apply_chat_template(
+        messages, add_generation_prompt=True, return_tensors="pt"
+    ).to(model.device)
+    with torch.no_grad():
+        out = model.generate(
+            input_ids,
+            max_new_tokens=cfg["seq_len"],
+            do_sample=False,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    text = tokenizer.decode(out[0][input_ids.shape[1]:], skip_special_tokens=True)
+    predictions.append({"example_id": ex["example_id"], "output": text.strip()})
+    if i % 10 == 0:
+        print(f"  generated {i}/{len(examples)}")
+body = json.dumps(
+    {"workflow": workflow, "candidate": "supplied", "grader": "auto", "predictions": predictions}
+).encode("utf-8")
+req = urllib.request.Request(
+    f"{endpoint}/blitz/projects/{project}/eval",
+    data=body,
+    method="POST",
+    headers={"content-type": "application/json", "x-api-key": api_key},
+)
+with urllib.request.urlopen(req) as resp:
+    run = json.loads(resp.read().decode("utf-8"))
+run_id = run.get("id", "?")
+print(f"Submitted {len(predictions)} predictions. Eval run: {run_id}")
+print(f"Poll: GET {endpoint}/blitz/projects/{project}/eval/runs/{run_id}")

blitz_cli-0.1.0/blitz_cli/templates/requirements.txt.tmpl ADDED Viewed

@@ -0,0 +1,11 @@
+# QLoRA SFT stack, pinned to a mutually-compatible set (CUDA 12.1, early 2026).
+# torch is intentionally omitted — the Dockerfile's pytorch/cuda base image ships
+# the matching CUDA build, and reinstalling risks pulling a CPU-only wheel.
+# If you bump trl/transformers, re-verify SFTConfig(max_length=...) + processing_class=.
+transformers==4.48.0
+trl==0.13.0
+peft==0.14.0
+accelerate==1.2.1
+bitsandbytes==0.45.0
+datasets==3.2.0
+sentencepiece==0.2.0

blitz_cli-0.1.0/blitz_cli/templates/train.py.tmpl ADDED Viewed

@@ -0,0 +1,89 @@
+"""QLoRA SFT — fine-tune the recommended base model on a Blitz workflow's traces.
+Reads config.json (written by `blitz scaffold`) and data/dataset.jsonl (one chat
+example per line, {"messages": [...]}), trains a 4-bit QLoRA adapter, and saves it
+to ./adapter. Run inside the provided container: `make train`.
+"""
+import json
+import torch
+from datasets import load_dataset
+from peft import LoraConfig, prepare_model_for_kbit_training
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from trl import SFTConfig, SFTTrainer
+with open("config.json") as fh:
+    cfg = json.load(fh)
+print(f"Base model: {cfg['base_model_hf']}  seq_len={cfg['seq_len']}")
+bnb = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_compute_dtype=torch.bfloat16,
+)
+tokenizer = AutoTokenizer.from_pretrained(cfg["base_model_hf"])
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+model = AutoModelForCausalLM.from_pretrained(
+    cfg["base_model_hf"],
+    quantization_config=bnb,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+)
+model = prepare_model_for_kbit_training(model)
+model.config.use_cache = False
+peft_config = LoraConfig(
+    r=cfg["lora_r"],
+    lora_alpha=cfg["lora_alpha"],
+    lora_dropout=cfg["lora_dropout"],
+    bias="none",
+    task_type="CAUSAL_LM",
+    target_modules="all-linear",
+)
+dataset = load_dataset("json", data_files="data/dataset.jsonl", split="train")
+def to_text(example):
+    # dataset.jsonl carries {"messages": [{role, content}, ...]} ending in the
+    # assistant turn — render it with the model's own chat template.
+    return {"text": tokenizer.apply_chat_template(example["messages"], tokenize=False)}
+dataset = dataset.map(to_text, remove_columns=dataset.column_names)
+sft_config = SFTConfig(
+    output_dir="./adapter",
+    num_train_epochs=cfg["epochs"],
+    per_device_train_batch_size=cfg["per_device_batch_size"],
+    gradient_accumulation_steps=cfg["grad_accum"],
+    learning_rate=cfg["lr"],
+    max_length=cfg["seq_len"],
+    packing=True,
+    bf16=True,
+    logging_steps=10,
+    save_strategy="epoch",
+    lr_scheduler_type="cosine",
+    warmup_ratio=0.03,
+    gradient_checkpointing=True,
+    dataset_text_field="text",
+    report_to="none",
+)
+trainer = SFTTrainer(
+    model=model,
+    args=sft_config,
+    train_dataset=dataset,
+    peft_config=peft_config,
+    processing_class=tokenizer,
+)
+trainer.train()
+trainer.save_model("./adapter")
+tokenizer.save_pretrained("./adapter")
+print("Saved LoRA adapter -> ./adapter. Next: `make eval` to score it on the held-out set.")

blitz_cli-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,25 @@
+[project]
+name = "blitz-cli"
+version = "0.1.0"
+description = "Developer CLI for Blitz: pull a workflow's training data + base-model recommendation and scaffold a runnable QLoRA training container."
+readme = "README.md"
+requires-python = ">=3.9"
+# Intentionally stdlib-only: the CLI talks to the Blitz HTTP API over urllib and
+# renders templates with string.Template. The heavy ML deps (torch/transformers/
+# trl/peft) live ONLY in the generated training project's requirements.txt — they
+# are never installed by this package.
+dependencies = []
+[project.scripts]
+blitz = "blitz_cli.cli:main"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+# blitz_cli (incl. the templates/ subpackage) ships in full. The scaffold
+# templates carry a .tmpl suffix so they're read as package data via
+# importlib.resources, never imported — their torch/transformers references are
+# not dependencies of this CLI.
+packages = ["blitz_cli"]

blitz_cli-0.1.0/tests/test_scaffold.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""Unit tests for blitz-cli config derivation, rendering, and the read client.
+No network: the client tests exercise URL/query building only.
+"""
+import json
+from pathlib import Path
+from blitz_cli import _scaffold
+from blitz_cli._client import BlitzClient
+def _rec(params_b=7.0, hf="Qwen/Qwen2.5-7B-Instruct", license="apache-2.0", seq_len=2048):
+    return {
+        "workflow": "wf",
+        "seq_len": seq_len,
+        "signals": {"p95_input": 800, "max_output": 400},
+        "recommendation": {
+            "id": "qwen2.5-7b",
+            "name": "Qwen2.5 7B Instruct",
+            "params_b": params_b,
+            "hf": hf,
+            "license": license,
+            "why": "mid-tier teacher",
+        },
+        "alternatives": [],
+    }
+def test_build_config_scales_with_params_b():
+    small = _scaffold.build_config(_rec(params_b=3.0), "wf")
+    mid = _scaffold.build_config(_rec(params_b=7.0), "wf")
+    big = _scaffold.build_config(_rec(params_b=14.0), "wf")
+    assert small["per_device_batch_size"] == 4 and small["lora_r"] == 16
+    assert mid["per_device_batch_size"] == 2 and mid["grad_accum"] == 8
+    assert big["lora_r"] == 32 and big["per_device_batch_size"] == 1
+    assert big["lr"] == 1e-4 and mid["lr"] == 2e-4
+    assert mid["lora_alpha"] == mid["lora_r"] * 2
+def test_seq_len_prefers_recommendation_then_falls_back():
+    assert _scaffold.build_config(_rec(seq_len=4096), "wf")["seq_len"] == 4096
+    # No seq_len → clamp(p95_input + max_output, 1024, 8192) = 1200
+    rec = _rec(seq_len=None)
+    assert _scaffold.build_config(rec, "wf")["seq_len"] == 1200
+def test_gated_license_warning():
+    assert _scaffold.gated_license_warning(_rec(license="apache-2.0")) is None
+    assert "gated" in _scaffold.gated_license_warning(_rec(license="llama-3.1")).lower()
+    assert _scaffold.gated_license_warning(_rec(license="gemma")) is not None
+def test_render_writes_a_runnable_project(tmp_path):
+    out = tmp_path / "train"
+    cfg = _scaffold.render(out, _rec(), "proj_abc", "wf", "http://localhost:8000")
+    for f in ("config.json", "recommendation.json", "train.py", "eval.py",
+              "requirements.txt", "Dockerfile", "Makefile", "README.md", ".dockerignore"):
+        assert (out / f).exists(), f
+    saved = json.loads((out / "config.json").read_text())
+    assert saved["base_model_hf"] == "Qwen/Qwen2.5-7B-Instruct"
+    assert saved == cfg
+    # Template substitution happened (no stray $placeholders left in Makefile),
+    # while make's own $(...) refs survive.
+    mk = (out / "Makefile").read_text()
+    assert "proj_abc" in mk and "$project" not in mk
+    assert "$(CURDIR)" in mk and "$(IMAGE)" in mk
+    # Dockerfile keeps the workflow name; train.py is shipped verbatim Python.
+    assert "wf" in (out / "Dockerfile").read_text()
+    assert "SFTTrainer" in (out / "train.py").read_text()
+def test_client_builds_urls_with_query_and_bools():
+    c = BlitzClient(endpoint="http://x/", api_key="k", project="proj_1")
+    assert c._url("/a") == "http://x/a"
+    url = c._url("/a", {"workflow": "wf", "include_synthetic": False, "skip": None})
+    assert "workflow=wf" in url and "include_synthetic=false" in url and "skip" not in url