freesolo-flash 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. flash/__init__.py +11 -0
  2. flash/_fileio.py +35 -0
  3. flash/_logging.py +49 -0
  4. flash/catalog.py +245 -0
  5. flash/cli/__init__.py +1 -0
  6. flash/cli/main/__init__.py +220 -0
  7. flash/cli/main/__main__.py +6 -0
  8. flash/cli/main/commands.py +430 -0
  9. flash/cli/main/envpush.py +333 -0
  10. flash/client/__init__.py +14 -0
  11. flash/client/config.py +46 -0
  12. flash/client/http.py +202 -0
  13. flash/client/specs.py +23 -0
  14. flash/engine/__init__.py +7 -0
  15. flash/engine/accounting.py +37 -0
  16. flash/engine/chalk_kernels.py +150 -0
  17. flash/engine/multiturn_rollout.py +273 -0
  18. flash/engine/recipe.py +86 -0
  19. flash/engine/vram.py +382 -0
  20. flash/engine/worker/__init__.py +1960 -0
  21. flash/engine/worker/__main__.py +4 -0
  22. flash/engine/worker/lora.py +137 -0
  23. flash/engine/worker/perf.py +467 -0
  24. flash/envs/__init__.py +10 -0
  25. flash/envs/adapter/__init__.py +384 -0
  26. flash/envs/adapter/rubric.py +222 -0
  27. flash/envs/base.py +49 -0
  28. flash/envs/registry.py +76 -0
  29. flash/mcp/__init__.py +1 -0
  30. flash/mcp/server.py +83 -0
  31. flash/providers/__init__.py +59 -0
  32. flash/providers/_auth.py +24 -0
  33. flash/providers/_http.py +100 -0
  34. flash/providers/_poll.py +87 -0
  35. flash/providers/allocator.py +173 -0
  36. flash/providers/base.py +496 -0
  37. flash/providers/preflight.py +80 -0
  38. flash/providers/runpod/__init__.py +108 -0
  39. flash/providers/runpod/api.py +109 -0
  40. flash/providers/runpod/auth.py +24 -0
  41. flash/providers/runpod/gpus.py +46 -0
  42. flash/providers/runpod/jobs.py +519 -0
  43. flash/providers/runpod/preflight.py +30 -0
  44. flash/providers/runpod/pricing.py +108 -0
  45. flash/providers/runpod/train/__init__.py +141 -0
  46. flash/providers/runpod/train/deps.py +371 -0
  47. flash/providers/runpod/train/endpoints.py +501 -0
  48. flash/providers/vast/__init__.py +120 -0
  49. flash/providers/vast/_bootstrap.py +288 -0
  50. flash/providers/vast/api.py +215 -0
  51. flash/providers/vast/auth.py +19 -0
  52. flash/providers/vast/gpus.py +21 -0
  53. flash/providers/vast/jobs/__init__.py +555 -0
  54. flash/providers/vast/jobs/builders.py +205 -0
  55. flash/providers/vast/preflight.py +27 -0
  56. flash/providers/vast/pricing.py +51 -0
  57. flash/providers/vast/train.py +27 -0
  58. flash/py.typed +0 -0
  59. flash/runner/__init__.py +290 -0
  60. flash/runner/deploy.py +349 -0
  61. flash/runner/lifecycle.py +437 -0
  62. flash/schema/__init__.py +285 -0
  63. flash/schema/fields.py +210 -0
  64. flash/serve/__init__.py +1 -0
  65. flash/serve/deploy.py +195 -0
  66. flash/server/__init__.py +1 -0
  67. flash/server/__main__.py +20 -0
  68. flash/server/app.py +424 -0
  69. flash/server/auth.py +132 -0
  70. flash/server/db.py +152 -0
  71. flash/server/envs.py +449 -0
  72. flash/spec.py +291 -0
  73. freesolo_flash-0.2.0.dist-info/METADATA +99 -0
  74. freesolo_flash-0.2.0.dist-info/RECORD +77 -0
  75. freesolo_flash-0.2.0.dist-info/WHEEL +4 -0
  76. freesolo_flash-0.2.0.dist-info/entry_points.txt +4 -0
  77. freesolo_flash-0.2.0.dist-info/licenses/LICENSE +201 -0
flash/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """Flash — managed LoRA post-training: log in with your freesolo key, train.
2
+
3
+ A focused developer experience (TOML run specs, pluggable environments,
4
+ CLI/API/MCP entry points, adapter deployment). Users authenticate with their
5
+ freesolo API key (`flash login`); the control plane runs each job on a managed
6
+ GPU (RunPod or Vast.ai) behind the scenes.
7
+ """
8
+
9
+ __all__ = ["__version__"]
10
+
11
+ __version__ = "0.2.0"
flash/_fileio.py ADDED
@@ -0,0 +1,35 @@
1
+ """Small shared file-IO helpers for credential/manifest JSON under ``~/.flash``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+
10
+
11
+ def read_json_or_empty(path: Path) -> dict:
12
+ """Parse a JSON object file, returning ``{}`` if it's missing or unreadable."""
13
+ try:
14
+ return json.loads(path.read_text())
15
+ except (OSError, ValueError):
16
+ return {}
17
+
18
+
19
+ def secure_json_write(path: Path, data: dict) -> None:
20
+ """Write ``data`` as JSON with private permissions (the file may hold a secret).
21
+
22
+ Creates the parent dir (0700) and opens the file 0600 from the start — never
23
+ write_text + chmod, which leaves it umask-readable in between. ``O_NOFOLLOW``
24
+ (where available) refuses to follow a symlink planted at ``path`` so the write
25
+ can't be redirected to clobber an arbitrary file.
26
+ """
27
+ path.parent.mkdir(parents=True, exist_ok=True)
28
+ with contextlib.suppress(OSError):
29
+ os.chmod(path.parent, 0o700)
30
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | getattr(os, "O_NOFOLLOW", 0)
31
+ fd = os.open(path, flags, 0o600)
32
+ with os.fdopen(fd, "w") as f:
33
+ json.dump(data, f, indent=2, sort_keys=True)
34
+ with contextlib.suppress(OSError):
35
+ os.chmod(path, 0o600)
flash/_logging.py ADDED
@@ -0,0 +1,49 @@
1
+ """Package logging helpers.
2
+
3
+ Library code logs through the ``flash`` logger and never configures handlers on import (it
4
+ attaches a :class:`logging.NullHandler`), so importing Flash stays silent for downstream
5
+ applications. The CLI calls :func:`configure_logging` to attach a console handler whose
6
+ level is controlled by ``-v/--verbose``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+
13
+ _ROOT_NAME = "flash"
14
+
15
+ # Attach a NullHandler once so "No handlers could be found" warnings never appear and
16
+ # importing the library produces no output unless the app opts in.
17
+ _root = logging.getLogger(_ROOT_NAME)
18
+ if not any(isinstance(h, logging.NullHandler) for h in _root.handlers):
19
+ _root.addHandler(logging.NullHandler())
20
+
21
+
22
+ def get_logger(name: str | None = None) -> logging.Logger:
23
+ """Return a logger under the ``flash`` namespace (e.g. ``get_logger(__name__)``)."""
24
+ if not name or name == _ROOT_NAME:
25
+ return logging.getLogger(_ROOT_NAME)
26
+ if name.startswith(_ROOT_NAME + "."):
27
+ return logging.getLogger(name)
28
+ return logging.getLogger(f"{_ROOT_NAME}.{name}")
29
+
30
+
31
+ def configure_logging(verbosity: int = 0, level: int | None = None) -> None:
32
+ """Attach a console handler to the ``flash`` logger and set its level.
33
+
34
+ ``verbosity`` maps repeated ``-v`` flags to levels (0=WARNING, 1=INFO, >=2=DEBUG).
35
+ An explicit ``level`` overrides the verbosity mapping.
36
+ """
37
+ if level is None:
38
+ level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
39
+
40
+ logger = logging.getLogger(_ROOT_NAME)
41
+ logger.setLevel(level)
42
+ # Replace any prior console handler we installed so repeated calls don't stack handlers.
43
+ for h in [h for h in logger.handlers if getattr(h, "_flash_console", False)]:
44
+ logger.removeHandler(h)
45
+ handler = logging.StreamHandler() # stderr
46
+ handler.setLevel(level)
47
+ handler.setFormatter(logging.Formatter("%(levelname)s %(name)s: %(message)s"))
48
+ handler._flash_console = True # type: ignore[attr-defined]
49
+ logger.addHandler(handler)
flash/catalog.py ADDED
@@ -0,0 +1,245 @@
1
+ """Curated model catalog for one-consumer-GPU LoRA jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from dataclasses import asdict, dataclass
7
+ from typing import Any
8
+
9
+ ALGORITHMS = ("sft", "grpo")
10
+
11
+
12
+ def normalize_algorithm(value: str) -> str:
13
+ """Canonical (lowercased, validated) algorithm name."""
14
+ value = (value or "grpo").lower()
15
+ if value not in ALGORITHMS:
16
+ raise ValueError(f"unsupported algorithm: {value}; known: {', '.join(ALGORITHMS)}")
17
+ return value
18
+
19
+
20
+ # The default GPU class a run lands on when none is pinned (also the open-model-policy
21
+ # sizing reference and the spec/from_dict fallback). The managed GPU class set (KNOWN)
22
+ # lives in providers.base; per-provider classes and pricing live under
23
+ # providers/{runpod,vast}. Defined above ModelInfo so it can back the recommended_gpu
24
+ # field default.
25
+ DEFAULT_GPU = "RTX 5090"
26
+
27
+ # Output vocab (== config.vocab_size, the lm_head / logits width — the PADDED model vocab,
28
+ # NOT the raw tokenizer token count). Sizes the GRPO fp32-logits VRAM term (engine.vram) and
29
+ # the per-device completion cap (engine.worker.rl_per_device_comps). This is the open-model
30
+ # fallback; curated per-model values live on each ModelInfo below and are read via
31
+ # vocab_size_for(). Over-estimating is the memory-SAFE direction (smaller cap, larger VRAM
32
+ # estimate), so the fallback is the largest catalog vocab.
33
+ _DEFAULT_VOCAB_SIZE = 248_320
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class ModelInfo:
38
+ id: str
39
+ display_name: str
40
+ params: str
41
+ algos: tuple[str, ...]
42
+ min_vram_gb: int
43
+ quant: str = "bf16"
44
+ recommended_gpu: str = DEFAULT_GPU
45
+ # GRPO needs more VRAM than SFT (a colocated vLLM rollout engine holds a second copy of
46
+ # the weights + KV cache). 0 => GRPO uses ``min_vram_gb`` like SFT; set it when the GRPO
47
+ # tier needs a bigger card than SFT (the colocate 2nd weight copy + KV pool). Consumed by
48
+ # engine.vram.model_required_vram_gb.
49
+ grpo_min_vram_gb: int = 0
50
+ notes: str = ""
51
+ # Worker container disk this model needs (GB). 0 = the platform default (64 GB)
52
+ # suffices. The runner raises gpu.disk_gb to at least this, so big-checkpoint
53
+ # models whose weights alone exceed 64 GB work out of the box.
54
+ min_disk_gb: int = 0
55
+ # Thinking/reasoning capability of the checkpoint's chat template:
56
+ # "none" no <think> support (or a non-thinking variant) — `thinking = true` is
57
+ # rejected for these models
58
+ # "hybrid" template honors enable_thinking (Qwen3-style hybrid reasoning)
59
+ # "always" the model always emits reasoning; enable_thinking can't turn it off,
60
+ # so `thinking = true` is required
61
+ # "unknown" open-model-policy entries (capability not verified)
62
+ thinking: str = "none"
63
+ # Output vocab = config.vocab_size (lm_head / logits width, the padded model vocab — not
64
+ # the raw tokenizer count). Drives the GRPO fp32-logits memory term and the per-device
65
+ # completion cap. Curated per model below; defaults to the open-model fallback.
66
+ vocab_size: int = _DEFAULT_VOCAB_SIZE
67
+
68
+ def to_dict(self) -> dict[str, Any]:
69
+ return asdict(self)
70
+
71
+
72
+ # The default model Flash trains when a config omits one. A current-gen dense 4B
73
+ # (text-only fine-tune) on the modern worker stack — the safe out-of-the-box choice for
74
+ # the average developer. It is thinking-"hybrid"; the thinking flag now defaults ON.
75
+ DEFAULT_MODEL = "Qwen/Qwen3.5-4B"
76
+
77
+ MODELS: dict[str, ModelInfo] = {
78
+ "openbmb/MiniCPM5-1B": ModelInfo(
79
+ id="openbmb/MiniCPM5-1B",
80
+ display_name="MiniCPM5 1B",
81
+ params="1.2B dense (Llama arch)",
82
+ vocab_size=130_560,
83
+ algos=("sft", "grpo"),
84
+ min_vram_gb=12,
85
+ recommended_gpu="RTX 4090",
86
+ thinking="hybrid",
87
+ notes="On-device class SLM (131k ctx); standard Llama architecture.",
88
+ ),
89
+ # ---- Qwen3.5 dense family: validated on the modern worker stack ----
90
+ # (trl 1.x / vllm 0.19 / transformers 5.x). Trained + served TEXT-ONLY: the
91
+ # checkpoints are natively multimodal, so LoRA excludes the vision tower and vLLM
92
+ # loads language_model_only (see flash.engine.worker). Each entry passed a real
93
+ # train+eval smoke on its recommended GPU (bench/results/phase1/).
94
+ "Qwen/Qwen3.5-0.8B": ModelInfo(
95
+ id="Qwen/Qwen3.5-0.8B",
96
+ display_name="Qwen3.5 0.8B",
97
+ params="0.9B (text-only fine-tune)",
98
+ vocab_size=248_320,
99
+ algos=("sft", "grpo"),
100
+ min_vram_gb=12,
101
+ recommended_gpu="RTX 4090",
102
+ thinking="hybrid",
103
+ notes="Smallest Qwen3.5; cheap smoke/dev runs with the modern arch.",
104
+ ),
105
+ "Qwen/Qwen3.5-2B": ModelInfo(
106
+ id="Qwen/Qwen3.5-2B",
107
+ display_name="Qwen3.5 2B",
108
+ params="2.3B (text-only fine-tune)",
109
+ vocab_size=248_320,
110
+ algos=("sft", "grpo"),
111
+ min_vram_gb=16,
112
+ recommended_gpu="RTX 4090",
113
+ thinking="hybrid",
114
+ ),
115
+ "Qwen/Qwen3.5-4B": ModelInfo(
116
+ id="Qwen/Qwen3.5-4B",
117
+ display_name="Qwen3.5 4B",
118
+ params="4.7B (text-only fine-tune)",
119
+ vocab_size=248_320,
120
+ algos=("sft", "grpo"),
121
+ min_vram_gb=32,
122
+ recommended_gpu="RTX 5090",
123
+ thinking="hybrid",
124
+ notes="Current-gen 4B. GRPO uses the sleep-mode memory recipe (hybrid arch needs "
125
+ "extra engine state-cache); fused DeltaNet kernels ship in the default stack.",
126
+ ),
127
+ "Qwen/Qwen3.5-9B": ModelInfo(
128
+ id="Qwen/Qwen3.5-9B",
129
+ display_name="Qwen3.5 9B",
130
+ params="9.7B (text-only fine-tune)",
131
+ vocab_size=248_320,
132
+ algos=("sft", "grpo"),
133
+ min_vram_gb=16,
134
+ # MEMORY-OPTIMIZED: 4-bit NF4 frozen base + bf16 LoRA adapter (QLoRA). The base
135
+ # drops from ~19 GB bf16 to ~5.3 GB, so colocated GRPO holds two 4-bit copies
136
+ # (trainer + bnb-quantized vLLM rollout) instead of two bf16 copies -> it fits a
137
+ # ~24-32 GB card instead of an 80 GB A100. NF4 is near-lossless for adapter training
138
+ # (QLoRA paper + follow-ups), a small quality trade for a ~3x cheaper GPU. No GRPO
139
+ # floor: the matrix sizes the (much smaller) 4-bit footprint directly.
140
+ grpo_min_vram_gb=0,
141
+ quant="4bit-qlora",
142
+ recommended_gpu="RTX 5090",
143
+ thinking="hybrid",
144
+ notes="QLoRA (4-bit NF4 base + bf16 LoRA). GRPO's colocated vLLM rollout loads the "
145
+ "base 4-bit via bitsandbytes too, so both copies are 4-bit -> fits ~24-32 GB "
146
+ "instead of 80 GB bf16. ~near-lossless vs bf16 LoRA.",
147
+ ),
148
+ }
149
+
150
+
151
+ def list_models() -> list[ModelInfo]:
152
+ return sorted(MODELS.values(), key=lambda m: (m.min_vram_gb, m.id))
153
+
154
+
155
+ def get_model(model_id: str) -> ModelInfo:
156
+ try:
157
+ return MODELS[model_id]
158
+ except KeyError as exc:
159
+ allowed = ", ".join(MODELS)
160
+ raise ValueError(
161
+ f"unsupported model {model_id!r}; choose one of: {allowed} — or set "
162
+ f'model_policy = "allow" in the config to run any HF model that fits the GPU '
163
+ f"(open-model policy)"
164
+ ) from exc
165
+
166
+
167
+ def vocab_size_for(model_id: str) -> int:
168
+ """Output vocab (== config.vocab_size, the lm_head / logits width) for a model — the
169
+ number that sizes the GRPO fp32-logits VRAM term and the per-device completion cap.
170
+ Returns the curated catalog value, else the safe default for open-model-policy entries.
171
+ This is the PADDED model vocab, not the raw tokenizer token count."""
172
+ info = MODELS.get(model_id)
173
+ return info.vocab_size if info is not None else _DEFAULT_VOCAB_SIZE
174
+
175
+
176
+ def resolve_model(
177
+ model_id: str,
178
+ algorithm: str,
179
+ policy: str = "catalog",
180
+ gpu: str | None = None,
181
+ ) -> ModelInfo:
182
+ """Resolve a model under the configured policy.
183
+
184
+ ``catalog`` (default): the model must be a curated catalog entry.
185
+ ``allow``: any HF model is accepted; a coarse VRAM-fit estimate (HF safetensors
186
+ metadata, no download) blocks only provably-impossible fits and warns on tight ones.
187
+ """
188
+ algo = normalize_algorithm(algorithm)
189
+ if model_id in MODELS:
190
+ return validate_model_for_algorithm(model_id, algo)
191
+ if policy != "allow":
192
+ # Reuse get_model's error (includes the open-model hint).
193
+ return get_model(model_id)
194
+ return _resolve_open_model(model_id, algo, gpu)
195
+
196
+
197
+ def _resolve_open_model(model_id: str, algo: str, gpu: str | None) -> ModelInfo:
198
+ """Synthesize a ModelInfo for the open-model "allow" policy from a coarse VRAM-fit
199
+ estimate (HF safetensors metadata, no download). Blocks provably-impossible fits and
200
+ warns on tight ones. Isolates the engine.vram dependency + disk-floor heuristic from
201
+ the curated-catalog path in resolve_model."""
202
+ from flash.engine.vram import check_fit
203
+
204
+ est = check_fit(model_id, algo, gpu or DEFAULT_GPU)
205
+ if est.verdict == "too_big":
206
+ raise ValueError(
207
+ f"{model_id} does not fit the requested GPU: {est.describe()}. "
208
+ f"Pick a smaller model or a larger supported GPU."
209
+ )
210
+ if est.verdict in ("tight", "unknown"):
211
+ print(f"warning: open-model policy: {est.describe()}")
212
+ params = f"{est.params_b:.1f}B" if est.params_b else "unknown size"
213
+ # Disk floor for the open model: a bf16 checkpoint is ~2 GB per billion params;
214
+ # add worker-stack headroom so a large model that passes the VRAM check can't
215
+ # provision a paid worker and then fail in prefetch_model when the checkpoint
216
+ # overflows the 64 GB container default. 0 (unknown size) leaves the default
217
+ # (the user can still raise it with gpu.disk_gb).
218
+ min_disk = int(est.params_b * 2) + 64 if est.params_b else 0
219
+ return ModelInfo(
220
+ id=model_id,
221
+ display_name=model_id,
222
+ params=params,
223
+ algos=ALGORITHMS,
224
+ min_vram_gb=math.ceil(est.est_gb) if est.est_gb else 24,
225
+ min_disk_gb=min_disk,
226
+ recommended_gpu=gpu or DEFAULT_GPU,
227
+ thinking="unknown",
228
+ notes="unlisted model accepted via the open-model policy (not curated/validated)",
229
+ )
230
+
231
+
232
+ def validate_model_for_algorithm(model_id: str, algorithm: str) -> ModelInfo:
233
+ info = get_model(model_id)
234
+ algo = normalize_algorithm(algorithm)
235
+ # Catalog entries advertise the capability classes "sft" and "grpo": grpo needs the
236
+ # colocated rollout engine, sft is trainer-only.
237
+ required = "grpo" if algo == "grpo" else "sft"
238
+ if required not in info.algos:
239
+ allowed = ", ".join(info.algos)
240
+ raise ValueError(f"{model_id} supports {allowed}, not {algo}")
241
+ return info
242
+
243
+
244
+ def public_model_rows() -> list[dict[str, Any]]:
245
+ return [m.to_dict() for m in list_models()]
flash/cli/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """CLI package."""
@@ -0,0 +1,220 @@
1
+ """CLI for the managed Flash service.
2
+
3
+ Every run-lifecycle command is a thin HTTP call to the Flash control plane —
4
+ users authenticate with their freesolo API key (`flash login` verifies it against
5
+ the freesolo backend), never with provider credentials. Config parsing/validation
6
+ and `--dry-run` stay fully local.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import sys
13
+
14
+ from flash import __version__
15
+ from flash._logging import configure_logging, get_logger
16
+
17
+ # Command handlers + the patched client surface live in submodules; re-export them so
18
+ # `flash.cli.main` stays the single public import surface (and so monkeypatching
19
+ # `flash.cli.main.commands` reaches the bare globals the handlers read).
20
+ from flash.cli.main.commands import ( # noqa: F401
21
+ _CLI_DONE_STATES,
22
+ _OK_STATES,
23
+ _STARTER_ENV_PY,
24
+ _USER_ERRORS,
25
+ _follow_run,
26
+ _poll_logs,
27
+ client_from_config,
28
+ cmd_attach,
29
+ cmd_cancel,
30
+ cmd_chat,
31
+ cmd_cost,
32
+ cmd_deploy,
33
+ cmd_deployments,
34
+ cmd_env_init,
35
+ cmd_env_list,
36
+ cmd_gpus,
37
+ cmd_lab_setup,
38
+ cmd_login,
39
+ cmd_logs,
40
+ cmd_models,
41
+ cmd_ps,
42
+ cmd_status,
43
+ cmd_train,
44
+ cmd_undeploy,
45
+ cmd_version,
46
+ cmd_whoami,
47
+ verify_freesolo_key,
48
+ )
49
+ from flash.cli.main.envpush import cmd_env_install, cmd_env_push
50
+
51
+ logger = get_logger("flash.cli.main")
52
+
53
+
54
+ def main(argv: list[str] | None = None) -> int:
55
+ parser = argparse.ArgumentParser(prog="flash", description="Managed LoRA post-training")
56
+ parser.add_argument("-V", "--version", action="version", version=f"flash {__version__}")
57
+ parser.add_argument(
58
+ "--debug",
59
+ action="store_true",
60
+ help="show full tracebacks on error",
61
+ )
62
+ parser.add_argument(
63
+ "-v",
64
+ "--verbose",
65
+ action="count",
66
+ default=0,
67
+ help="increase log verbosity (-v for info, -vv for debug)",
68
+ )
69
+ sub = parser.add_subparsers(dest="cmd", required=True)
70
+
71
+ version = sub.add_parser("version", help="print the Flash version")
72
+ version.set_defaults(func=cmd_version)
73
+
74
+ login = sub.add_parser("login", help="log in with your freesolo API key (verified by freesolo)")
75
+ login.add_argument(
76
+ "--api-key",
77
+ help="your freesolo API key (default: FREESOLO_API_KEY); created in the dashboard",
78
+ )
79
+ login.add_argument(
80
+ "--freesolo-url",
81
+ dest="freesolo_url",
82
+ help="freesolo backend base URL (default: FREESOLO_BASE_URL or https://api.freesolo.co)",
83
+ )
84
+ login.add_argument(
85
+ "--api-url", help="flash control-plane URL for training calls (default: FLASH_API_URL)"
86
+ )
87
+ login.set_defaults(func=cmd_login)
88
+
89
+ whoami = sub.add_parser("whoami", help="show the identity behind your stored key")
90
+ whoami.set_defaults(func=cmd_whoami)
91
+
92
+ lab = sub.add_parser("lab", help="local authoring scaffolds")
93
+ lab_sub = lab.add_subparsers(dest="lab_cmd", required=True)
94
+ setup = lab_sub.add_parser("setup", help="scaffold environments/ + configs/ in the cwd")
95
+ setup.set_defaults(func=cmd_lab_setup)
96
+
97
+ models = sub.add_parser("models", help="list supported base models")
98
+ models.set_defaults(func=cmd_models)
99
+
100
+ gpus = sub.add_parser("gpus", help="list managed GPU classes with live $/hr")
101
+ gpus.set_defaults(func=cmd_gpus)
102
+
103
+ env = sub.add_parser("env", help="manage verifiers environments")
104
+ env_sub = env.add_subparsers(dest="env_cmd", required=True)
105
+ init = env_sub.add_parser("init", help="scaffold a new local verifiers environment")
106
+ init.add_argument("name")
107
+ init.set_defaults(func=cmd_env_init)
108
+
109
+ env_list = env_sub.add_parser("list", help="list installed + local environments")
110
+ env_list.set_defaults(func=cmd_env_list)
111
+
112
+ env_install = env_sub.add_parser("install", help="install a published Prime Hub environment")
113
+ env_install.add_argument("env_id", help='the env id to install (a Hub slug, "owner/name")')
114
+ env_install.set_defaults(func=cmd_env_install)
115
+
116
+ env_push = env_sub.add_parser(
117
+ "push", help="publish a local verifiers env to the Prime Hub (private); prints its env id"
118
+ )
119
+ env_push.add_argument("path", nargs="?", default=".")
120
+ env_push.set_defaults(func=cmd_env_push)
121
+
122
+ train = sub.add_parser("train", help="submit a managed training run from a TOML config")
123
+ train.add_argument("config")
124
+ train.add_argument(
125
+ "--config",
126
+ dest="extra_configs",
127
+ action="append",
128
+ default=[],
129
+ help="additional TOML to deep-merge (config composition); repeatable",
130
+ )
131
+ train.add_argument(
132
+ "--set",
133
+ dest="overrides",
134
+ action="append",
135
+ default=[],
136
+ metavar="key=value",
137
+ help="override a config value; repeatable",
138
+ )
139
+ train.add_argument("--dry-run", action="store_true")
140
+ train.add_argument(
141
+ "--background",
142
+ action="store_true",
143
+ help="submit and return immediately instead of following logs",
144
+ )
145
+ train.set_defaults(func=cmd_train)
146
+
147
+ status = sub.add_parser("status", help="show a run's full status JSON")
148
+ status.add_argument("run_id")
149
+ status.set_defaults(func=cmd_status)
150
+
151
+ attach = sub.add_parser(
152
+ "attach", help="follow a running job's logs to completion (resumable any time)"
153
+ )
154
+ attach.add_argument("run_id")
155
+ attach.set_defaults(func=cmd_attach)
156
+
157
+ ps = sub.add_parser("ps", help="list runs and their state/cost")
158
+ ps.set_defaults(func=cmd_ps)
159
+
160
+ cost = sub.add_parser("cost", help="show a run's accrued cost (USD)")
161
+ cost.add_argument("run_id")
162
+ cost.set_defaults(func=cmd_cost)
163
+
164
+ cancel = sub.add_parser("cancel", help="cancel a run (best-effort)")
165
+ cancel.add_argument("run_id")
166
+ cancel.set_defaults(func=cmd_cancel)
167
+
168
+ logs = sub.add_parser("logs")
169
+ logs.add_argument("run_id")
170
+ logs.add_argument("-f", "--follow", action="store_true", help="stream new log lines")
171
+ logs.set_defaults(func=cmd_logs)
172
+
173
+ deploy = sub.add_parser("deploy")
174
+ deploy.add_argument("run_id")
175
+ deploy.add_argument(
176
+ "--mode",
177
+ choices=["dev", "always-on"],
178
+ default="dev",
179
+ help="dev: scale-to-zero, cold start after idle, $0 when unused (default). "
180
+ "always-on: one warm worker 24/7, no cold starts, continuous billing.",
181
+ )
182
+ deploy.add_argument(
183
+ "--idle-timeout",
184
+ type=int,
185
+ default=300,
186
+ help="dev mode: seconds of inactivity before the worker scales to zero (default 300)",
187
+ )
188
+ deploy.add_argument("--dry-run", action="store_true")
189
+ deploy.set_defaults(func=cmd_deploy)
190
+
191
+ undeploy = sub.add_parser("undeploy", help="tear down a run's serving endpoint")
192
+ undeploy.add_argument("run_id")
193
+ undeploy.set_defaults(func=cmd_undeploy)
194
+
195
+ deployments = sub.add_parser("deployments", help="list active serving deployments")
196
+ deployments.set_defaults(func=cmd_deployments)
197
+
198
+ chat = sub.add_parser("chat", help="chat with a deployed adapter")
199
+ chat.add_argument("run_id")
200
+ chat.add_argument("-m", "--message", required=True)
201
+ chat.add_argument("--max-tokens", type=int, default=512)
202
+ chat.add_argument("--temperature", type=float, default=0.0)
203
+ chat.set_defaults(func=cmd_chat)
204
+
205
+ # The control plane is operator-only and run as a separate one-off service via the
206
+ # `flash-server` console script (flash.server.__main__:main), not a `flash` subcommand.
207
+
208
+ args = parser.parse_args(argv)
209
+ configure_logging(verbosity=getattr(args, "verbose", 0))
210
+ debug = getattr(args, "debug", False)
211
+ try:
212
+ return args.func(args)
213
+ except _USER_ERRORS as exc:
214
+ if debug:
215
+ raise
216
+ print(f"error: {exc}", file=sys.stderr)
217
+ return 1
218
+ except KeyboardInterrupt:
219
+ print("aborted", file=sys.stderr)
220
+ return 130
@@ -0,0 +1,6 @@
1
+ import sys
2
+
3
+ from flash.cli.main import main
4
+
5
+ if __name__ == "__main__":
6
+ sys.exit(main())