PyPI - freesolo-flash-dev - Versions diffs - 0.2.25__py3-none-any.whl - Mend

freesolo-flash-dev 0.2.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

flash/__init__.py +29 -0
flash/_channel.py +23 -0
flash/_fileio.py +35 -0
flash/_logging.py +49 -0
flash/_update_check.py +266 -0
flash/catalog.py +253 -0
flash/cli/__init__.py +1 -0
flash/cli/main/__init__.py +227 -0
flash/cli/main/__main__.py +6 -0
flash/cli/main/commands.py +636 -0
flash/cli/main/envpush.py +317 -0
flash/cli/main/render.py +599 -0
flash/cli/main/training_doc.py +455 -0
flash/client/__init__.py +14 -0
flash/client/config.py +70 -0
flash/client/http.py +372 -0
flash/client/runtime_secrets.py +69 -0
flash/client/specs.py +20 -0
flash/cost/__init__.py +16 -0
flash/cost/analytical.py +175 -0
flash/cost/facts.py +114 -0
flash/cost/spec.py +113 -0
flash/cost/types.py +158 -0
flash/engine/__init__.py +6 -0
flash/engine/accounting.py +36 -0
flash/engine/chalk_kernels.py +116 -0
flash/engine/multiturn_rollout.py +780 -0
flash/engine/recipe.py +86 -0
flash/engine/vram.py +603 -0
flash/engine/worker/__init__.py +2916 -0
flash/engine/worker/__main__.py +4 -0
flash/engine/worker/kernel_warmup.py +400 -0
flash/engine/worker/lora.py +796 -0
flash/engine/worker/packing.py +366 -0
flash/engine/worker/perf.py +1048 -0
flash/envs/__init__.py +10 -0
flash/envs/adapter/__init__.py +883 -0
flash/envs/adapter/rubric.py +222 -0
flash/envs/base.py +52 -0
flash/envs/registry.py +62 -0
flash/mcp/__init__.py +1 -0
flash/mcp/server.py +85 -0
flash/providers/__init__.py +59 -0
flash/providers/_auth.py +24 -0
flash/providers/_http.py +230 -0
flash/providers/_instance.py +416 -0
flash/providers/_instance_bootstrap.py +517 -0
flash/providers/_poll.py +311 -0
flash/providers/allocator.py +193 -0
flash/providers/base.py +431 -0
flash/providers/hyperstack/__init__.py +127 -0
flash/providers/hyperstack/api.py +522 -0
flash/providers/hyperstack/auth.py +17 -0
flash/providers/hyperstack/gpus.py +29 -0
flash/providers/hyperstack/jobs/__init__.py +632 -0
flash/providers/hyperstack/jobs/builders.py +122 -0
flash/providers/hyperstack/preflight.py +23 -0
flash/providers/hyperstack/pricing.py +26 -0
flash/providers/hyperstack/train.py +25 -0
flash/providers/lambdalabs/__init__.py +139 -0
flash/providers/lambdalabs/api.py +261 -0
flash/providers/lambdalabs/auth.py +18 -0
flash/providers/lambdalabs/gpus.py +29 -0
flash/providers/lambdalabs/jobs/__init__.py +724 -0
flash/providers/lambdalabs/jobs/builders.py +118 -0
flash/providers/lambdalabs/preflight.py +27 -0
flash/providers/lambdalabs/pricing.py +51 -0
flash/providers/lambdalabs/train.py +27 -0
flash/providers/preflight.py +55 -0
flash/providers/realized.py +80 -0
flash/providers/runpod/__init__.py +130 -0
flash/providers/runpod/api.py +186 -0
flash/providers/runpod/auth.py +37 -0
flash/providers/runpod/cost.py +57 -0
flash/providers/runpod/gpus.py +46 -0
flash/providers/runpod/jobs.py +956 -0
flash/providers/runpod/keys.py +139 -0
flash/providers/runpod/preflight.py +30 -0
flash/providers/runpod/preload.py +915 -0
flash/providers/runpod/pricing.py +18 -0
flash/providers/runpod/slots.py +79 -0
flash/providers/runpod/train/__init__.py +150 -0
flash/providers/runpod/train/deps.py +395 -0
flash/providers/runpod/train/endpoints.py +820 -0
flash/py.typed +0 -0
flash/runner/__init__.py +686 -0
flash/runner/checkpoints.py +82 -0
flash/runner/deploy.py +422 -0
flash/runner/lifecycle.py +672 -0
flash/schema/__init__.py +375 -0
flash/schema/fields.py +331 -0
flash/serve/__init__.py +1 -0
flash/serve/deploy.py +326 -0
flash/serve/pricing.py +60 -0
flash/server/__init__.py +1 -0
flash/server/__main__.py +20 -0
flash/server/app.py +961 -0
flash/server/auth.py +263 -0
flash/server/billing.py +124 -0
flash/server/checkpoints.py +110 -0
flash/server/db.py +160 -0
flash/server/environment_registry.py +102 -0
flash/server/envs.py +360 -0
flash/server/reconcile.py +163 -0
flash/server/run_registry.py +150 -0
flash/spec.py +333 -0
freesolo_flash_dev-0.2.25.dist-info/METADATA +192 -0
freesolo_flash_dev-0.2.25.dist-info/RECORD +111 -0
freesolo_flash_dev-0.2.25.dist-info/WHEEL +4 -0
freesolo_flash_dev-0.2.25.dist-info/entry_points.txt +3 -0
freesolo_flash_dev-0.2.25.dist-info/licenses/LICENSE +201 -0

flash/cli/main/commands.py ADDED Viewed

@@ -0,0 +1,636 @@
+"""CLI command handlers for the managed Flash service.
+Every run-lifecycle command is a thin HTTP call to the Flash control plane —
+users authenticate with their freesolo API key (`flash login` verifies it against
+the freesolo backend), never with provider credentials. Config parsing/validation
+and `--dry-run` stay fully local.
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+import time
+from pathlib import Path
+from flash import __version__
+from flash._logging import get_logger
+from flash.catalog import public_model_rows
+from flash.client import (
+    ApiClient,
+    ClientError,
+    client_from_config,
+    save_credentials,
+    verify_freesolo_key,
+)
+from flash.client.config import load_credentials
+from flash.client.runtime_secrets import runtime_secrets_from_local_env
+from flash.client.specs import spec_payload
+from flash.cost.spec import runconfig_from_spec
+from flash.runner import TERMINAL_STATES, new_run_id
+from flash.schema import ConfigError, spec_from_file
+from . import render
+from .training_doc import TRAINING_MD
+logger = get_logger("flash.cli.main")
+# Exceptions that represent expected user/config errors: report them as a clean one-line
+# message instead of a Python traceback (use --debug to see the full trace).
+_USER_ERRORS = (
+    ConfigError,
+    ClientError,
+    FileNotFoundError,
+    ValueError,
+)
+# Run states after which nothing more will happen (polling can stop).
+_CLI_DONE_STATES = TERMINAL_STATES | {"deployed"}
+_OK_STATES = {"done", "dry_run", "deployed"}
+_SPINNER_FRAMES = "|/-\\"
+_SPINNER_TICK_SECONDS = 0.1
+class _LogFollowSpinner:
+    def __init__(self, run_id: str):
+        self._run_id = run_id
+        self._frame = 0
+        self._last_len = 0
+        self._active = False
+        self._enabled = sys.stderr.isatty()
+    @property
+    def enabled(self) -> bool:
+        return self._enabled
+    def render(self, state: str) -> None:
+        if not self._enabled:
+            return
+        frame = _SPINNER_FRAMES[self._frame % len(_SPINNER_FRAMES)]
+        self._frame += 1
+        message = f"{frame} following logs for {self._run_id} ({state})"
+        padding = " " * max(0, self._last_len - len(message))
+        sys.stderr.write(f"\r{message}{padding}")
+        sys.stderr.flush()
+        self._last_len = len(message)
+        self._active = True
+    def clear(self) -> None:
+        if not (self._enabled and self._active):
+            return
+        sys.stderr.write(f"\r{' ' * self._last_len}\r")
+        sys.stderr.flush()
+        self._active = False
+def _sleep_with_spinner(interval: float, spinner: _LogFollowSpinner, state: str) -> None:
+    if interval <= 0:
+        return
+    if not spinner.enabled:
+        time.sleep(interval)
+        return
+    ticks = max(1, int(interval / _SPINNER_TICK_SECONDS))
+    sleep_for = interval / ticks
+    for _ in range(ticks):
+        spinner.render(state)
+        time.sleep(sleep_for)
+def cmd_version(args) -> int:
+    if render.styled():
+        print(render.version(__version__))
+    else:
+        print(f"flash {__version__}")
+    return 0
+def cmd_login(args) -> int:
+    # Login is handled by the freesolo backend (not the flash control plane): the user
+    # supplies the freesolo API key they created at freesolo.co/sign-in, and we verify it against
+    # freesolo before storing it. The same key authenticates flash's control plane.
+    try:
+        env_api_key = os.environ.get("FREESOLO_API_KEY")
+        api_key = args.api_key or env_api_key
+        if not api_key:
+            raise ClientError(
+                "no API key provided: pass `--api-key <key>` or set FREESOLO_API_KEY. "
+                "Create or copy a key at https://freesolo.co/sign-in."
+            )
+        verify_freesolo_key(api_key, base_url=getattr(args, "freesolo_url", None))
+    except ClientError as exc:
+        # Login failed (no key, a rejected key, or an unreachable backend): say so plainly
+        # and point the user back at `flash login` to try again. `--debug` still surfaces
+        # the full traceback via the top-level handler.
+        if getattr(args, "debug", False):
+            raise
+        print(render.login_failed(str(exc)), file=sys.stderr)
+        return 1
+    api_url = args.api_url or load_credentials()[0]
+    # save_credentials clears the stored url when it's the default, so logging into the
+    # default plane also drops a stale custom url from a previous custom-URL login.
+    _ = save_credentials(api_key, api_url=api_url)
+    if args.api_key and env_api_key and env_api_key != args.api_key:
+        print(
+            "warning: FREESOLO_API_KEY is set and will override this saved login for future "
+            "commands; unset FREESOLO_API_KEY to use the saved key.",
+            file=sys.stderr,
+        )
+    # Show who they are right away (the same identity `flash whoami` prints) so they don't
+    # have to run a second command. Never echo the key itself. The identity lookup is
+    # best-effort: the key is already verified and stored, so a momentary control-plane
+    # hiccup must not turn a successful login into a failure.
+    print(render.login_ok(_identity_or_none(api_key, api_url)))
+    return 0
+# A control-plane hiccup must not make a successful login appear to hang while we fetch a
+# nonessential card, so the best-effort identity lookup uses a short timeout.
+_IDENTITY_LOOKUP_TIMEOUT_S = 5.0
+def _identity_or_none(api_key: str, api_url: str) -> dict | None:
+    # Use the key/url we just verified and stored, not `client_from_config()`: an ambient
+    # FREESOLO_API_KEY would otherwise win over the file and render the wrong identity.
+    try:
+        return ApiClient(api_url, api_key, timeout=_IDENTITY_LOOKUP_TIMEOUT_S).me()
+    except (ClientError, OSError, ValueError):
+        return None
+def cmd_whoami(args) -> int:
+    print(render.whoami(client_from_config().me()))
+    return 0
+_STARTER_ENV_PY = '''\
+"""Starter Freesolo environment.
+Edit datasets/train.jsonl and the reward code, then upload with
+`flash env push --name my-env .`.
+A managed run should use the returned [environment] id from
+`flash env push --name my-env .`.
+This starter keeps a tiny smoke-test dataset in datasets/train.jsonl. Replace it
+with your real training rows before a real run.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from freesolo.datasets.types import TaskExample
+from freesolo.environments import EnvironmentSingleTurn, RewardResult
+DEFAULT_DATASET_PATH = Path(__file__).parent / "datasets" / "train.jsonl"
+def load_jsonl(path: str | Path):
+    rows = []
+    with Path(path).open() as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                rows.append(json.loads(line))
+    return rows
+def exact_match_reward(example: TaskExample, response_text: str) -> RewardResult:
+    expected = str(example.output or "").strip()
+    score = 1.0 if expected and expected in response_text else 0.0
+    return RewardResult(score=score, threshold=1.0)
+class StarterEnv(EnvironmentSingleTurn):
+    dataset = load_jsonl(DEFAULT_DATASET_PATH)
+    def build_prompt_messages(self, example: TaskExample, prompt_text: str):
+        return [{"role": "user", "content": example.input}]
+    def score_response(self, example: TaskExample, response_text: str) -> RewardResult:
+        return exact_match_reward(example, response_text)
+def load_environment(dataset_path: str | None = None, **kwargs) -> StarterEnv:
+    env = StarterEnv()
+    if dataset_path:
+        env.dataset = load_jsonl(dataset_path)
+    return env
+'''
+_STARTER_DATASET_JSONL = """\
+{"input":"What is 2 + 2?","output":"4"}
+{"input":"What is 3 + 5?","output":"8"}
+"""
+def cmd_env_setup(args) -> int:
+    Path("configs").mkdir(exist_ok=True)
+    Path("datasets").mkdir(exist_ok=True)
+    dataset = Path("datasets/train.jsonl")
+    if not dataset.exists():
+        dataset.write_text(_STARTER_DATASET_JSONL)
+    starter_env = Path("environment.py")
+    if not starter_env.exists():
+        starter_env.write_text(_STARTER_ENV_PY)
+    env_comment = (
+        "# Environment: upload this project folder with\n"
+        "# `flash env push --name my-env .`, then paste the returned id below.\n"
+        "# If the environment reads secrets with os.environ, list only the env var names here.\n"
+        "# Values are read from your shell or .env at submit time and are not stored in the spec.\n"
+        "[environment]\n"
+        'id = ""\n\n'
+        '# secrets = ["SERPAPI_API_KEY"]\n\n'
+    )
+    rl = Path("configs/rl.toml")
+    if not rl.exists():
+        rl.write_text(
+            'model = "Qwen/Qwen3.5-4B"\n'
+            'algorithm = "grpo"\n\n'
+            f"{env_comment}"
+            "[train]\n"
+            "steps = 150\n"
+            "lora_rank = 32\n"
+            "seeds = [0]\n"
+            "# GPU and the HF artifact repo are managed automatically by the platform: the GPU is\n"
+            "# the cheapest fitting class across providers, and each run gets its own artifact repo.\n"
+        )
+    sft = Path("configs/sft.toml")
+    if not sft.exists():
+        sft.write_text(
+            'model = "Qwen/Qwen3.5-4B"\n'
+            'algorithm = "sft"\n\n'
+            f"{env_comment}"
+            "[train]\n"
+            "epochs = 1\n"
+            "lora_rank = 32\n"
+            "seeds = [0]\n"
+            "# GPU and the HF artifact repo are managed automatically by the platform: the GPU is\n"
+            "# the cheapest fitting class across providers, and each run gets its own artifact repo.\n"
+        )
+    # TRAINING.md is the playbook for the AI agent driving these runs: how to design the
+    # reward, what to read, and how to decide a run actually improved (not just finished).
+    training = Path("TRAINING.md")
+    if not training.exists():
+        # Explicit UTF-8: TRAINING_MD has non-ASCII (em dashes, ·, √, ≥, ≈), which would
+        # raise UnicodeEncodeError under a non-UTF-8 locale with write_text's default.
+        training.write_text(TRAINING_MD, encoding="utf-8")
+    scaffolded = [
+        "environment.py",
+        "datasets/train.jsonl",
+        "configs/rl.toml",
+        "configs/sft.toml",
+        "TRAINING.md",
+    ]
+    if render.styled():
+        print(render.env_setup(scaffolded))
+        return 0
+    print(f"ensured {', '.join(scaffolded)}")
+    return 0
+def cmd_models(args) -> int:
+    rows = public_model_rows()
+    if render.styled():
+        print(render.models_table(rows))
+        return 0
+    for row in rows:
+        print(row["id"])
+    return 0
+def cmd_gpus(args) -> int:
+    """List RunPod GPU classes, VRAM, and $/hr."""
+    from flash.providers.base import GPU_INFO
+    from flash.providers.runpod.pricing import static_rates as runpod_static_rates
+    runpod_rates = runpod_static_rates()
+    infos = sorted(
+        (info for info in GPU_INFO.values() if info.enum_member), key=lambda g: g.hourly_usd
+    )
+    tip = (
+        "Tip: GPU class selection is fully automatic — the submit-time allocator always picks the\n"
+        "cheapest validated RunPod class that fits the model, so you don't pin a GPU type."
+    )
+    if render.styled():
+        rows = [(info.name, info.vram_gb, runpod_rates.get(info.name)) for info in infos]
+        print(render.gpus_table(rows, tip))
+        return 0
+    def fmt_rate(v: float | None) -> str:
+        return f"{v:>10.2f}" if v else f"{'-':>10}"
+    print(f"{'gpu':<16}{'vram':>6}{'runpod$/hr':>11}")
+    for info in infos:
+        runpod_rate = runpod_rates.get(info.name)
+        print(f"{info.name:<16}{info.vram_gb:>5}G{fmt_rate(runpod_rate):>11}")
+    print(f"\n{tip}")
+    return 0
+def cmd_env_list(args) -> int:
+    from flash.envs.registry import list_installed_environments
+    installed = list_installed_environments()
+    paths: list[str] = []
+    if Path("environment.py").is_file():
+        paths.append(".")
+    local = Path("environments")
+    if local.is_dir():
+        # Prefer publishing folders. Single-file modules remain supported for small smoke tests.
+        for p in local.iterdir():
+            if p.name.startswith("__"):
+                continue
+            if p.is_dir():
+                stem = p.name.replace("-", "_")
+                module = p / f"{stem}.py"
+                canonical = p / "environment.py"
+                if canonical.is_file() or module.is_file():
+                    paths.append(f"environments/{p.name}")
+            elif p.suffix == ".py":
+                paths.append(f"environments/{p.name}")
+    # Decide the rendering up front so the themed panel and the legacy lines never both print.
+    if render.styled():
+        print(render.env_list(list(installed), sorted(paths)))
+        return 0
+    if installed:
+        print("installed environments:")
+        for env_id in installed:
+            print(f"  {env_id}")
+    if paths:
+        print("local env sources (publish with `flash env push --name <name> <path>`):")
+        for path in sorted(paths):
+            print(f"  {path}")
+    return 0
+def _cmd_train_cost(args) -> int:
+    """`flash train --cost`: print the pre-flight USD cost for the config and exit (no submit).
+    Catalog-only and deterministic; an uncapped SFT run tries to count the env's train split, and
+    falls back to a default example count (with a warning) when the environment isn't
+    importable here."""
+    from flash.cost import estimate_cost
+    spec = spec_from_file(
+        args.config,
+        run_id=None,
+        overrides=args.overrides,
+        extra_configs=args.extra_configs,
+    )
+    estimate = estimate_cost(runconfig_from_spec(spec))
+    if render.styled():
+        print(render.cost_panel(estimate))
+    else:
+        print(estimate.breakdown())
+    return 0
+def cmd_train(args) -> int:
+    if getattr(args, "cost", False):
+        return _cmd_train_cost(args)
+    spec = spec_from_file(
+        args.config,
+        run_id=new_run_id() if args.dry_run else None,
+        overrides=args.overrides,
+        extra_configs=args.extra_configs,
+    )
+    if args.dry_run:
+        # Fully local: validate the id-based config without credentials, a server, or a GPU.
+        payload = {"run_id": spec.run_id, "state": "dry_run", "spec": spec.to_dict()}
+        if render.styled():
+            print(
+                render.object_panel("train", payload, "dry run — validated locally, not submitted")
+            )
+        else:
+            print(json.dumps(payload, indent=2))
+        return 0
+    client = client_from_config()
+    status = client.create_run(
+        spec_payload(spec),
+        runtime_secrets=runtime_secrets_from_local_env(args.config, keys=spec.environment.secrets),
+    )
+    run_id = status["run_id"]
+    logger.info(
+        "submitted run %s: model=%s algorithm=%s gpu=%s seeds=%s",
+        run_id,
+        spec.model,
+        spec.algorithm,
+        spec.gpu.type,
+        list(spec.train.seeds),
+    )
+    if args.background:
+        if render.styled():
+            print(render.object_panel("train", status, "submitted (running in background)"))
+        else:
+            print(json.dumps(status, indent=2))
+        return 0
+    if render.styled():
+        print(render.submitted(run_id), file=sys.stderr)
+    else:
+        print(
+            f"run {run_id} submitted; following logs "
+            f"(Ctrl-C detaches, `flash status {run_id} --follow` resumes)",
+            file=sys.stderr,
+        )
+    return _follow_run(client, run_id)
+def _poll_logs(client: ApiClient, run_id: str, interval: float) -> str:
+    """Stream offset-paged logs until the run reaches a terminal state; return that state."""
+    offset = 0
+    spinner = _LogFollowSpinner(run_id)
+    try:
+        while True:
+            page = client.get_logs(run_id, offset=offset)
+            if page["logs"]:
+                spinner.clear()
+                print(page["logs"], end="", flush=True)
+            offset = page["offset"]
+            if page["state"] in _CLI_DONE_STATES:
+                spinner.clear()
+                return page["state"]
+            _sleep_with_spinner(interval, spinner, page["state"])
+    finally:
+        spinner.clear()
+def _follow_run(client: ApiClient, run_id: str) -> int:
+    """Poll logs until the run reaches a terminal state, then print the final status."""
+    state = _poll_logs(client, run_id, interval=2.0)
+    status = client.get_run(run_id)
+    if render.styled():
+        print(render.run_status(status))
+    else:
+        print(json.dumps(status, indent=2))
+    return 0 if state in _OK_STATES else 1
+def cmd_status(args) -> int:
+    client = client_from_config()
+    if getattr(args, "follow", False):
+        return _follow_run(client, args.run_id)
+    if getattr(args, "logs", False):
+        logs = client.get_logs(args.run_id).get("logs", "")
+        printed_any = False
+        if logs:
+            print(logs, end="")
+            if not logs.endswith("\n"):
+                print()
+            printed_any = True
+        # Always append the real train-subprocess output (the orchestrator log can't carry it);
+        # the server fetches console_/error_<phase>.txt from HF with the operator token.
+        for name, text in (client.get_worker_output(args.run_id) or {}).items():
+            if not text:
+                continue
+            # Separate sections with a blank line, but NOT before the first thing printed (an empty
+            # orchestrator log would otherwise leave a leading blank line above the first section).
+            sep = "\n" if printed_any else ""
+            print(f"{sep}----- {name} -----")
+            print(text, end="" if text.endswith("\n") else "\n")
+            printed_any = True
+    status = client.get_run(args.run_id)
+    if render.styled():
+        print(render.run_status(status))
+    else:
+        print(json.dumps(status, indent=2))
+    return 0
+def cmd_runs(args) -> int:
+    runs = client_from_config().list_runs()
+    if not runs:
+        if render.styled():
+            print(render.empty("runs", "0 runs", "no runs yet — submit one with `flash train`"))
+        else:
+            print("no runs yet")
+        return 0
+    if render.styled():
+        print(render.runs_table(runs))
+        return 0
+    print(f"{'RUN_ID':<32}  {'STATE':<11}  {'ALGO':<5}  {'COST($)':>8}  {'GPU':<22}  MODEL")
+    for r in sorted(runs, key=lambda r: r.get("updated_at", 0), reverse=True):
+        spec = r.get("spec") or {}
+        model = spec.get("model", "")
+        algorithm = str(spec.get("algorithm") or "-").upper()
+        remote = r.get("remote") or {}
+        # the remote handle knows what actually ran; the spec is the parse-time pick
+        provider = remote.get("provider") or (
+            "runpod" if remote else (spec.get("gpu") or {}).get("provider", "")
+        )
+        gpu = remote.get("gpu") or (spec.get("gpu") or {}).get("type", "")
+        where = f"{gpu}@{provider}" if provider else gpu
+        print(
+            f"{r['run_id']:<32}  {r['state']:<11}  {algorithm:<5}  "
+            f"{r.get('cost_usd', 0.0):>8.4f}  {where:<22}  {model}"
+        )
+    return 0
+def cmd_cancel(args) -> int:
+    status = client_from_config().cancel_run(args.run_id)
+    payload = {"run_id": args.run_id, "state": status["state"]}
+    if render.styled():
+        print(render.object_panel("cancel", payload))
+    else:
+        print(json.dumps(payload, indent=2))
+    return 0
+def cmd_checkpoints(args) -> int:
+    checkpoints = client_from_config().checkpoints(args.run_id)
+    if not checkpoints:
+        print(
+            f"no deployable checkpoints for {args.run_id} yet "
+            "(RL streams one per save interval; SFT-only runs have none).",
+            file=sys.stderr,
+        )
+        return 0
+    for c in checkpoints:
+        print(f"step {c['step']:>6}  {c['repo_id']}:{c['subfolder']}")
+    print(
+        f"\ndeploy one with `flash deploy {args.run_id} --step <STEP>`.",
+        file=sys.stderr,
+    )
+    return 0
+def cmd_deploy(args) -> int:
+    dep = client_from_config().deploy(
+        args.run_id,
+        dry_run=args.dry_run,
+        step=getattr(args, "step", None),
+    )
+    if render.styled():
+        print(render.object_panel("deploy", dep))
+    else:
+        print(json.dumps(dep, indent=2))
+    print(
+        "note: serving is billed per token only; use "
+        f"`flash undeploy {args.run_id}` to deregister the adapter.",
+        file=sys.stderr,
+    )
+    return 0
+def cmd_undeploy(args) -> int:
+    result = client_from_config().undeploy(args.run_id)
+    if render.styled():
+        print(render.object_panel("undeploy", result))
+    else:
+        print(json.dumps(result, indent=2))
+    return 0
+def cmd_deployments(args) -> int:
+    rows = client_from_config().deployments()
+    if not rows:
+        if render.styled():
+            print(render.empty("deployments", "0 active", "no active deployments"))
+        else:
+            print("no active deployments")
+        return 0
+    if render.styled():
+        print(render.deployments_table(rows))
+        return 0
+    print(f"{'RUN_ID':<32}  {'GPU':<9}  ENDPOINT")
+    for r in rows:
+        d = r.get("deployment") or {}
+        print(f"{r['run_id']:<32}  {d.get('gpu', '?'):<9}  {d.get('endpoint_name', '')}")
+    return 0
+def cmd_chat(args) -> int:
+    client = client_from_config()
+    messages = [{"role": "user", "content": args.message}]
+    # A faint speaker label on a TTY; the reply text itself stays plain so a piped transcript
+    # is byte-for-byte the model's words.
+    if render.styled():
+        print(render.chat_label())
+    stream = getattr(client, "chat_stream", None)
+    if stream is not None:
+        wrote = False
+        for chunk in stream(
+            args.run_id,
+            messages=messages,
+            temperature=args.temperature,
+            max_tokens=args.max_tokens,
+        ):
+            print(chunk, end="", flush=True)
+            wrote = True
+        if wrote:
+            print()
+        return 0
+    resp = client.chat(
+        args.run_id,
+        messages=messages,
+        temperature=args.temperature,
+        max_tokens=args.max_tokens,
+    )
+    print(resp["choices"][0]["message"]["content"])
+    return 0