freesolo-flash-dev 0.2.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. flash/__init__.py +29 -0
  2. flash/_channel.py +23 -0
  3. flash/_fileio.py +35 -0
  4. flash/_logging.py +49 -0
  5. flash/_update_check.py +266 -0
  6. flash/catalog.py +253 -0
  7. flash/cli/__init__.py +1 -0
  8. flash/cli/main/__init__.py +227 -0
  9. flash/cli/main/__main__.py +6 -0
  10. flash/cli/main/commands.py +636 -0
  11. flash/cli/main/envpush.py +317 -0
  12. flash/cli/main/render.py +599 -0
  13. flash/cli/main/training_doc.py +455 -0
  14. flash/client/__init__.py +14 -0
  15. flash/client/config.py +70 -0
  16. flash/client/http.py +372 -0
  17. flash/client/runtime_secrets.py +69 -0
  18. flash/client/specs.py +20 -0
  19. flash/cost/__init__.py +16 -0
  20. flash/cost/analytical.py +175 -0
  21. flash/cost/facts.py +114 -0
  22. flash/cost/spec.py +113 -0
  23. flash/cost/types.py +158 -0
  24. flash/engine/__init__.py +6 -0
  25. flash/engine/accounting.py +36 -0
  26. flash/engine/chalk_kernels.py +116 -0
  27. flash/engine/multiturn_rollout.py +780 -0
  28. flash/engine/recipe.py +86 -0
  29. flash/engine/vram.py +603 -0
  30. flash/engine/worker/__init__.py +2916 -0
  31. flash/engine/worker/__main__.py +4 -0
  32. flash/engine/worker/kernel_warmup.py +400 -0
  33. flash/engine/worker/lora.py +796 -0
  34. flash/engine/worker/packing.py +366 -0
  35. flash/engine/worker/perf.py +1048 -0
  36. flash/envs/__init__.py +10 -0
  37. flash/envs/adapter/__init__.py +883 -0
  38. flash/envs/adapter/rubric.py +222 -0
  39. flash/envs/base.py +52 -0
  40. flash/envs/registry.py +62 -0
  41. flash/mcp/__init__.py +1 -0
  42. flash/mcp/server.py +85 -0
  43. flash/providers/__init__.py +59 -0
  44. flash/providers/_auth.py +24 -0
  45. flash/providers/_http.py +230 -0
  46. flash/providers/_instance.py +416 -0
  47. flash/providers/_instance_bootstrap.py +517 -0
  48. flash/providers/_poll.py +311 -0
  49. flash/providers/allocator.py +193 -0
  50. flash/providers/base.py +431 -0
  51. flash/providers/hyperstack/__init__.py +127 -0
  52. flash/providers/hyperstack/api.py +522 -0
  53. flash/providers/hyperstack/auth.py +17 -0
  54. flash/providers/hyperstack/gpus.py +29 -0
  55. flash/providers/hyperstack/jobs/__init__.py +632 -0
  56. flash/providers/hyperstack/jobs/builders.py +122 -0
  57. flash/providers/hyperstack/preflight.py +23 -0
  58. flash/providers/hyperstack/pricing.py +26 -0
  59. flash/providers/hyperstack/train.py +25 -0
  60. flash/providers/lambdalabs/__init__.py +139 -0
  61. flash/providers/lambdalabs/api.py +261 -0
  62. flash/providers/lambdalabs/auth.py +18 -0
  63. flash/providers/lambdalabs/gpus.py +29 -0
  64. flash/providers/lambdalabs/jobs/__init__.py +724 -0
  65. flash/providers/lambdalabs/jobs/builders.py +118 -0
  66. flash/providers/lambdalabs/preflight.py +27 -0
  67. flash/providers/lambdalabs/pricing.py +51 -0
  68. flash/providers/lambdalabs/train.py +27 -0
  69. flash/providers/preflight.py +55 -0
  70. flash/providers/realized.py +80 -0
  71. flash/providers/runpod/__init__.py +130 -0
  72. flash/providers/runpod/api.py +186 -0
  73. flash/providers/runpod/auth.py +37 -0
  74. flash/providers/runpod/cost.py +57 -0
  75. flash/providers/runpod/gpus.py +46 -0
  76. flash/providers/runpod/jobs.py +956 -0
  77. flash/providers/runpod/keys.py +139 -0
  78. flash/providers/runpod/preflight.py +30 -0
  79. flash/providers/runpod/preload.py +915 -0
  80. flash/providers/runpod/pricing.py +18 -0
  81. flash/providers/runpod/slots.py +79 -0
  82. flash/providers/runpod/train/__init__.py +150 -0
  83. flash/providers/runpod/train/deps.py +395 -0
  84. flash/providers/runpod/train/endpoints.py +820 -0
  85. flash/py.typed +0 -0
  86. flash/runner/__init__.py +686 -0
  87. flash/runner/checkpoints.py +82 -0
  88. flash/runner/deploy.py +422 -0
  89. flash/runner/lifecycle.py +672 -0
  90. flash/schema/__init__.py +375 -0
  91. flash/schema/fields.py +331 -0
  92. flash/serve/__init__.py +1 -0
  93. flash/serve/deploy.py +326 -0
  94. flash/serve/pricing.py +60 -0
  95. flash/server/__init__.py +1 -0
  96. flash/server/__main__.py +20 -0
  97. flash/server/app.py +961 -0
  98. flash/server/auth.py +263 -0
  99. flash/server/billing.py +124 -0
  100. flash/server/checkpoints.py +110 -0
  101. flash/server/db.py +160 -0
  102. flash/server/environment_registry.py +102 -0
  103. flash/server/envs.py +360 -0
  104. flash/server/reconcile.py +163 -0
  105. flash/server/run_registry.py +150 -0
  106. flash/spec.py +333 -0
  107. freesolo_flash_dev-0.2.25.dist-info/METADATA +192 -0
  108. freesolo_flash_dev-0.2.25.dist-info/RECORD +111 -0
  109. freesolo_flash_dev-0.2.25.dist-info/WHEEL +4 -0
  110. freesolo_flash_dev-0.2.25.dist-info/entry_points.txt +3 -0
  111. freesolo_flash_dev-0.2.25.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,150 @@
1
+ """Best-effort reporting of managed Flash runs/checkpoints to the Freesolo backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import json
7
+ import logging
8
+ import os
9
+ import urllib.error
10
+ import urllib.request
11
+ from datetime import UTC, datetime
12
+ from typing import Any
13
+
14
+ from .auth import INTERNAL_KEY_ENV, freesolo_base_url
15
+
16
+ _LOG = logging.getLogger("flash.server.runs")
17
+ _TIMEOUT_S = 10.0
18
+ _RUN_PATH = "/api/flash/runs/internal"
19
+ _CHECKPOINT_PATH = "/api/flash/runs/checkpoints/internal"
20
+
21
+
22
+ def _iso_from_epoch(value: float | int | None) -> str | None:
23
+ if value is None:
24
+ return None
25
+ try:
26
+ return datetime.fromtimestamp(float(value), tz=UTC).isoformat()
27
+ except (TypeError, ValueError, OSError):
28
+ return None
29
+
30
+
31
+ def _post(path: str, body: dict[str, Any]) -> bool:
32
+ internal_key = os.environ.get(INTERNAL_KEY_ENV)
33
+ if not internal_key:
34
+ return False
35
+ req = urllib.request.Request(
36
+ f"{freesolo_base_url()}{path}",
37
+ data=json.dumps(body).encode("utf-8"),
38
+ method="POST",
39
+ headers={
40
+ "Authorization": f"Bearer {internal_key}",
41
+ "Content-Type": "application/json",
42
+ },
43
+ )
44
+ try:
45
+ with urllib.request.urlopen(req, timeout=_TIMEOUT_S) as resp:
46
+ return 200 <= resp.status < 300
47
+ except urllib.error.HTTPError as exc:
48
+ detail = ""
49
+ with contextlib.suppress(Exception):
50
+ detail = exc.read().decode("utf-8", "replace")[:500]
51
+ _LOG.warning("failed to report %s: HTTP %s %s", path, exc.code, detail)
52
+ except (urllib.error.URLError, OSError) as exc:
53
+ _LOG.warning("failed to report %s: %s", path, exc)
54
+ return False
55
+
56
+
57
+ def _context_from_status(status: Any) -> dict[str, Any]:
58
+ platform = getattr(status, "platform_context", None)
59
+ if isinstance(platform, dict):
60
+ return platform
61
+ billing = getattr(status, "billing_context", None)
62
+ if isinstance(billing, dict):
63
+ return billing
64
+ return {}
65
+
66
+
67
+ def _spec_from_status(status: Any) -> dict[str, Any]:
68
+ spec = getattr(status, "spec", None)
69
+ return spec if isinstance(spec, dict) else {}
70
+
71
+
72
+ def _managed_environment_slug(spec: dict[str, Any]) -> str | None:
73
+ env = spec.get("environment") if isinstance(spec.get("environment"), dict) else {}
74
+ env_id = env.get("id")
75
+ if not isinstance(env_id, str) or not env_id.strip():
76
+ return None
77
+ try:
78
+ from flash.envs.adapter import is_managed_environment_slug
79
+
80
+ return env_id.strip() if is_managed_environment_slug(env_id.strip()) else None
81
+ except Exception:
82
+ return None
83
+
84
+
85
+ def record_training_run(*, status: Any, key: dict[str, Any] | None = None) -> bool:
86
+ context = {**_context_from_status(status), **(key or {})}
87
+ org_id = str(context.get("org_id") or "").strip()
88
+ if not org_id:
89
+ return False
90
+
91
+ spec = _spec_from_status(status)
92
+ gpu = spec.get("gpu") if isinstance(spec.get("gpu"), dict) else {}
93
+ body = {
94
+ "orgId": org_id,
95
+ "runId": status.run_id,
96
+ "status": status.state,
97
+ "userId": context.get("user_id"),
98
+ "apiKeyId": context.get("api_key_id"),
99
+ "environmentSlug": _managed_environment_slug(spec),
100
+ "model": spec.get("model") if isinstance(spec.get("model"), str) else None,
101
+ "algorithm": spec.get("algorithm") if isinstance(spec.get("algorithm"), str) else None,
102
+ "phase": spec.get("phase") if isinstance(spec.get("phase"), str) else None,
103
+ "gpuType": gpu.get("type") if isinstance(gpu.get("type"), str) else None,
104
+ "costUsd": status.cost_usd,
105
+ "realizedCostUsd": status.realized_cost_usd,
106
+ "adapterRef": status.to_dict().get("adapter_ref"),
107
+ "artifactsDir": status.artifacts_dir,
108
+ "error": status.error,
109
+ "spec": spec,
110
+ "deployment": status.deployment,
111
+ "lastHeartbeat": status.last_heartbeat,
112
+ "gpuStatus": status.gpu_status,
113
+ "createdAt": _iso_from_epoch(status.created_at),
114
+ "updatedAt": _iso_from_epoch(status.updated_at),
115
+ "metadata": {"source": "flash.control_plane"},
116
+ }
117
+ return _post(_RUN_PATH, body)
118
+
119
+
120
+ def record_training_checkpoint(
121
+ *,
122
+ spec: Any,
123
+ seed: int,
124
+ metrics: dict[str, Any],
125
+ artifact_path: str,
126
+ ) -> bool:
127
+ try:
128
+ from flash.runner import adapter_ref, get_status
129
+
130
+ status = get_status(spec.run_id)
131
+ ref = adapter_ref(spec, seed=seed)
132
+ except Exception:
133
+ return False
134
+ context = _context_from_status(status)
135
+ org_id = str(context.get("org_id") or "").strip()
136
+ if not org_id:
137
+ return False
138
+ body = {
139
+ "orgId": org_id,
140
+ "runId": spec.run_id,
141
+ "checkpointId": f"seed{seed}",
142
+ "seed": seed,
143
+ "phase": getattr(spec, "phase", None),
144
+ "adapterRef": ref,
145
+ "artifactPath": artifact_path,
146
+ "metrics": metrics,
147
+ "metadata": {"source": "flash.control_plane"},
148
+ "updatedAt": _iso_from_epoch(getattr(status, "updated_at", None)),
149
+ }
150
+ return _post(_CHECKPOINT_PATH, body)
flash/spec.py ADDED
@@ -0,0 +1,333 @@
1
+ """Structured job specification shared by CLI/API/runner and GPU workers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from dataclasses import asdict, dataclass, field
8
+ from typing import Any
9
+
10
+ from .catalog import DEFAULT_GPU, DEFAULT_MODEL, normalize_algorithm
11
+
12
+ _FALSE_STRINGS = {"", "0", "false", "no", "off", "none"}
13
+
14
+
15
+ def _str_tuple(value: Any) -> tuple[str, ...]:
16
+ """Normalize a string-or-list knob (e.g. stop_sequences) to a tuple of strings.
17
+
18
+ A bare string is ONE element — never iterated into characters ("</s>" must not become
19
+ ('<','/','s','>')). None and empty strings -> () (no stop configured); empty entries
20
+ in a list are dropped."""
21
+ if value is None:
22
+ return ()
23
+ if isinstance(value, str):
24
+ return (value,) if value else ()
25
+ return tuple(s for s in (str(x) for x in value) if s)
26
+
27
+
28
+ def coerce_bool(value: Any) -> bool:
29
+ """Parse a bool from loosely-typed sources (JSON request bodies / env / persisted dicts).
30
+
31
+ bool(...) on a string is truthy for ANY non-empty string, so "false"/"0"/"no" would
32
+ wrongly become True; treat the usual falsey strings (see ``_FALSE_STRINGS``) as False.
33
+ An already-bool value passes through.
34
+ """
35
+ if isinstance(value, str):
36
+ return value.strip().lower() not in _FALSE_STRINGS
37
+ return bool(value)
38
+
39
+
40
+ def _coerce_str_map(value: Any) -> dict[str, str]:
41
+ """Coerce a loosely-typed spec field into a ``dict[str, str]``.
42
+
43
+ A malformed persisted spec (or programmatic caller) can set a mapping field to a non-dict;
44
+ `.items()` on that would crash `from_dict` with AttributeError. Treat a non-dict as empty,
45
+ mirroring how the other nested fields tolerate missing/garbage input.
46
+ """
47
+ if not isinstance(value, dict):
48
+ return {}
49
+ return {str(k): str(v) for k, v in value.items()}
50
+
51
+
52
+ def _coerce_wandb(value: Any) -> WandbSpec:
53
+ """Coerce a loosely-typed ``wandb`` spec field into a ``WandbSpec``.
54
+
55
+ A malformed/older persisted spec can set ``wandb`` to a non-dict (e.g. a bare string), and
56
+ ``(value or {}).get(...)`` would crash ``from_dict`` with AttributeError on the worker. Treat
57
+ a non-dict as empty (default naming), mirroring ``_coerce_str_map``. String-coerce + trim the
58
+ leaves so a non-string label can't reach the W&B SDK / run-name path; blank -> None (default).
59
+ """
60
+ if not isinstance(value, dict):
61
+ return WandbSpec()
62
+
63
+ def _label(v: Any) -> str | None:
64
+ if v is None:
65
+ return None
66
+ s = str(v).strip()
67
+ return s or None
68
+
69
+ return WandbSpec(project=_label(value.get("project")), run_name=_label(value.get("run_name")))
70
+
71
+
72
+ def _volume_gb(value: Any, default: int = 100) -> int:
73
+ """Parse the platform-managed weight-cache volume size, defaulting on anything not a positive int.
74
+
75
+ Tolerant by design (the field is platform-set, and stale/hand-edited specs must still load): a
76
+ missing / null / empty / non-numeric value, or a non-positive size (incl. the string "0" or a
77
+ negative), all fall back to ``default`` rather than crashing or round-tripping a nonsensical size.
78
+ """
79
+ if isinstance(value, bool):
80
+ # bool is an int subclass (int(True) == 1), so a stray boolean would become a 1 GB volume;
81
+ # treat it as invalid and default (mirrors the bool rejection in _opt_int).
82
+ return default
83
+ try:
84
+ gb = int(value)
85
+ except (TypeError, ValueError):
86
+ return default
87
+ return gb if gb > 0 else default
88
+
89
+
90
+ def _opt_int(value: Any) -> int | None:
91
+ """Parse an optional int from a loosely-typed spec source; None stays None.
92
+
93
+ Rejects JSON booleans: ``bool`` is an ``int`` subclass in Python, so ``int(True)`` would
94
+ silently coerce a stray boolean train knob to 1 (and ``False`` to 0). Mirrors the
95
+ bool rejection in schema._train_int — a bool is a type error, not a number.
96
+ """
97
+ if value is None:
98
+ return None
99
+ if isinstance(value, bool):
100
+ raise TypeError(f"expected a number, got bool {value!r}")
101
+ return int(value)
102
+
103
+
104
+ def _opt_float(value: Any) -> float | None:
105
+ """Parse an optional float from a loosely-typed spec source; None stays None.
106
+
107
+ Rejects JSON booleans (``bool`` is an ``int`` subclass) so a stray boolean train knob is
108
+ not silently coerced to 0.0/1.0; mirrors the bool rejection in schema._train_float.
109
+ """
110
+ if value is None:
111
+ return None
112
+ if isinstance(value, bool):
113
+ raise TypeError(f"expected a number, got bool {value!r}")
114
+ return float(value)
115
+
116
+
117
+ @dataclass(frozen=True)
118
+ class EnvironmentSpec:
119
+ # Freesolo environment id. No default:
120
+ # a run must name an environment explicitly (validated in schema / the worker).
121
+ id: str = ""
122
+ params: dict[str, Any] = field(default_factory=dict)
123
+ # Pip requirements the GPU worker needs for this environment.
124
+ # Filled in client-side from the local install manifest so the managed control
125
+ # plane never depends on client-local state; empty means "derive on the server".
126
+ pip: tuple[str, ...] = ()
127
+ # Secret env var names the environment requires on the worker. Values are never stored in the
128
+ # spec; the client reads matching local env/.env values and sends them out-of-band via
129
+ # runtime_secrets.
130
+ secrets: tuple[str, ...] = ()
131
+ # Optional pinned commit SHA for the environment's GitHub ref, resolved ONCE in the control
132
+ # plane (runner._assign_resolved_env_sha, called from submit_job after the spec is finalized) so
133
+ # every worker boots from an immutable sha instead of each one re-resolving the symbolic ref
134
+ # (e.g. "main") against the GitHub commits API — which trips GitHub's secondary rate limit on a
135
+ # cold spawn wave. Empty (the default, and whenever the control-plane resolve fails) preserves
136
+ # today's behavior: the worker resolves the ref itself. The adapter only trusts a real 40-char
137
+ # sha (see adapter._resolve_ref_sha), so a stale/garbage value falls back to live resolution.
138
+ resolved_sha: str = ""
139
+
140
+
141
+ @dataclass(frozen=True)
142
+ class TrainSpec:
143
+ steps: int | None = None
144
+ epochs: int | None = None
145
+ lora_rank: int = 32
146
+ lora_alpha: int = 64
147
+ seeds: tuple[int, ...] = (0,)
148
+ # Artifact-store adapter ref output by `flash status`:
149
+ # ``<hf_repo>:<phase>/<run_id>/seed<N>``.
150
+ init_from_adapter: str = ""
151
+ # Per-run HuggingFace artifact repo ("owner/name") for this run's adapter/checkpoint/
152
+ # code storage AND serving. PLATFORM-MANAGED, not a user field: the control plane assigns
153
+ # it server-side in runner.submit_job (a per-run private dataset under the operator's
154
+ # namespace, written by the operator HF_TOKEN). A user-supplied value is ignored by
155
+ # schema.spec_from_dict; this field carries the control-plane-assigned repo to the worker.
156
+ hf_repo: str = ""
157
+ # Optimizer/batching knobs (SFT + GRPO). None -> the worker's tuned recipe default.
158
+ # batch_size is the GLOBAL/effective batch (SFT: grad-accum is sized to hit it; GRPO:
159
+ # prompts per optimizer step). max_length is the SFT max sequence length. save_every
160
+ # is the checkpoint interval in optimizer steps.
161
+ learning_rate: float | None = None
162
+ batch_size: int | None = None
163
+ max_length: int | None = None
164
+ save_every: int | None = None
165
+ # SFT caps (None/0 -> no cap). max_steps caps optimizer steps (cheap pre-flight smoke);
166
+ # max_examples truncates the SFT dataset.
167
+ max_steps: int | None = None
168
+ max_examples: int | None = None
169
+ # GRPO recipe knobs (datums parity), shipped by the SDK in [train] (NOT in
170
+ # [environment.params], which is forwarded verbatim to the Freesolo env loader).
171
+ # None/() -> recipe default. group_size = completions per prompt; temperature = rollout
172
+ # sampling temp; max_tokens = completion budget; kl_penalty_coef = KL beta;
173
+ # advantage_clip = centered-advantage clamp; thinking_length_penalty_coef =
174
+ # per-<think>-token reward deduction; stop_sequences = rollout stop strings.
175
+ group_size: int | None = None
176
+ temperature: float | None = None
177
+ max_tokens: int | None = None
178
+ kl_penalty_coef: float | None = None
179
+ advantage_clip: float | None = None
180
+ thinking_length_penalty_coef: float | None = None
181
+ stop_sequences: tuple[str, ...] = ()
182
+
183
+
184
+ @dataclass(frozen=True)
185
+ class GpuSpec:
186
+ # The parse-time provisional GPU class (cheapest VALIDATED class that fits the model). GPU
187
+ # pinning is gone: the submit-time allocator always re-picks the cheapest fitting validated
188
+ # active RunPod class, so a config's gpu.type does NOT pin — ``type`` is just the offline
189
+ # sizing/display default and the carrier the runner overwrites with the actually-allocated
190
+ # class.
191
+ type: str = DEFAULT_GPU
192
+ disk_gb: int = 60
193
+ max_wall_seconds: int = 24 * 3600
194
+ # Auto-resubmit budget for infra-shaped failures (worker loss / stall / timeout);
195
+ # each retry resumes from the latest streamed checkpoint.
196
+ max_retries: int = 2
197
+ # Persistent RunPod network-volume weight cache (platform-managed, NOT user config). When set,
198
+ # the RunPod provider attaches a same-named volume in EVERY datacenter in the cache fleet and
199
+ # allows the endpoint across all of them (no single-DC pin), and the worker points HF_HOME at
200
+ # the mount so a model download is a one-time cost per region instead of per run. Assigned by
201
+ # the runner (``_assign_weight_cache_volume``); a single fixed datacenter is intentionally NOT
202
+ # a field — the DC SET is deploy-time platform policy (see jobs.weight_cache_datacenters), so a
203
+ # run can never be region-pinned. ``None`` = no volume (cold download, cross-region).
204
+ network_volume: str | None = None
205
+ network_volume_gb: int = 100
206
+
207
+
208
+ @dataclass(frozen=True)
209
+ class WandbSpec:
210
+ # Optional W&B naming, defined in the [wandb] config table (first-class spec config, NOT
211
+ # env vars). project/run_name are non-secret labels; the actual WANDB_API_KEY stays an
212
+ # env-var secret. None -> the worker's defaults ("flash" project, "flash-<phase>-<run_id>-
213
+ # seedN" run name).
214
+ project: str | None = None
215
+ run_name: str | None = None
216
+
217
+
218
+ @dataclass(frozen=True)
219
+ class JobSpec:
220
+ model: str = DEFAULT_MODEL
221
+ algorithm: str = "grpo"
222
+ environment: EnvironmentSpec = field(default_factory=EnvironmentSpec)
223
+ train: TrainSpec = field(default_factory=TrainSpec)
224
+ gpu: GpuSpec = field(default_factory=GpuSpec)
225
+ run_id: str = "local"
226
+ # Per-run worker-environment overrides merged into the GPU worker's env (highest precedence
227
+ # over the control-plane os.environ allowlist). The escape hatch for A/B kernel experiments
228
+ # that must differ PER RUN, not globally: e.g. an optimizer or LoRA-init override on just the
229
+ # experiment run while others keep the global default. Forwarded verbatim (string values);
230
+ # never set secrets here.
231
+ worker_env: dict[str, str] = field(default_factory=dict)
232
+ # "catalog" (curated models only) or "allow" (any HF model that fits the GPU).
233
+ model_policy: str = "catalog"
234
+ # Thinking/reasoning mode (thinking-capable models only). One flag per run, consumed
235
+ # identically by SFT rendering, RL rollouts, and serving (decoding parity). OFF by default
236
+ # (operator preference: training defaults to no-reasoning; set thinking = true to enable).
237
+ thinking: bool = False
238
+ # Optional W&B run naming from the [wandb] config table. Carried as typed spec config
239
+ # (round-tripped in the job-spec JSON the worker reads), not as environment variables.
240
+ wandb: WandbSpec = field(default_factory=WandbSpec)
241
+
242
+ @property
243
+ def phase(self) -> str:
244
+ return "rl" if self.algorithm == "grpo" else self.algorithm
245
+
246
+ def to_dict(self) -> dict[str, Any]:
247
+ return asdict(self)
248
+
249
+ def to_json(self) -> str:
250
+ return json.dumps(self.to_dict(), sort_keys=True)
251
+
252
+ @classmethod
253
+ def from_dict(cls, data: dict[str, Any]) -> JobSpec:
254
+ env = data.get("environment") or {}
255
+ # Defense-in-depth: a stale/older payload may still carry a local `path`. The worker only
256
+ # runs published Freesolo environment ids, so reject it here rather than silently
257
+ # dropping it.
258
+ if isinstance(env, dict) and env.get("path"):
259
+ raise ValueError(
260
+ "local environment paths are no longer supported; the worker only runs "
261
+ "published Freesolo environment ids"
262
+ )
263
+ train = data.get("train") or {}
264
+ gpu = data.get("gpu") or {}
265
+ return cls(
266
+ model=data.get("model", cls.model),
267
+ algorithm=normalize_algorithm(data.get("algorithm", cls.algorithm)),
268
+ environment=EnvironmentSpec(
269
+ id=env.get("id", ""),
270
+ params=dict(env.get("params") or {}),
271
+ pip=tuple(str(p) for p in env.get("pip") or ()),
272
+ secrets=_str_tuple(env.get("secrets")),
273
+ resolved_sha=str(env.get("resolved_sha") or ""),
274
+ ),
275
+ train=TrainSpec(
276
+ steps=_opt_int(train.get("steps")),
277
+ epochs=_opt_int(train.get("epochs")),
278
+ lora_rank=int(train.get("lora_rank", 32)),
279
+ lora_alpha=int(train.get("lora_alpha", 64)),
280
+ seeds=tuple(int(s) for s in train.get("seeds", (0,))),
281
+ init_from_adapter=str(train.get("init_from_adapter") or ""),
282
+ hf_repo=str(train.get("hf_repo") or ""),
283
+ learning_rate=_opt_float(train.get("learning_rate")),
284
+ batch_size=_opt_int(train.get("batch_size")),
285
+ max_length=_opt_int(train.get("max_length")),
286
+ save_every=_opt_int(train.get("save_every")),
287
+ max_steps=_opt_int(train.get("max_steps")),
288
+ max_examples=_opt_int(train.get("max_examples")),
289
+ group_size=_opt_int(train.get("group_size")),
290
+ temperature=_opt_float(train.get("temperature")),
291
+ max_tokens=_opt_int(train.get("max_tokens")),
292
+ kl_penalty_coef=_opt_float(train.get("kl_penalty_coef")),
293
+ advantage_clip=_opt_float(train.get("advantage_clip")),
294
+ thinking_length_penalty_coef=_opt_float(train.get("thinking_length_penalty_coef")),
295
+ stop_sequences=_str_tuple(train.get("stop_sequences")),
296
+ ),
297
+ gpu=GpuSpec(
298
+ type=gpu.get("type", DEFAULT_GPU),
299
+ disk_gb=int(gpu.get("disk_gb", 60)),
300
+ max_wall_seconds=int(gpu.get("max_wall_seconds", 24 * 3600)),
301
+ max_retries=int(gpu.get("max_retries", 2)),
302
+ # network_volume/network_volume_gb round-trip so the runner-assigned weight cache
303
+ # survives the to_dict()->from_dict() hops in _with_model_disk / _spec_with_gpu /
304
+ # _assign_managed_hf_repo before deploy. A legacy ``datacenter`` key (from the
305
+ # reverted single-DC pin) is intentionally ignored — the DC set is deploy-time
306
+ # policy now, so stale specs carrying it are tolerated, never region-pinned.
307
+ network_volume=gpu.get("network_volume"),
308
+ # Tolerant: null / "" / "0" / 0 / negative / non-numeric / missing -> the default.
309
+ # Platform-managed, so a stale or hand-edited spec must still load with a sane size.
310
+ network_volume_gb=_volume_gb(gpu.get("network_volume_gb")),
311
+ ),
312
+ run_id=data.get("run_id", "local"),
313
+ worker_env=_coerce_str_map(data.get("worker_env")),
314
+ model_policy=data.get("model_policy", "catalog"),
315
+ thinking=coerce_bool(data.get("thinking", False)),
316
+ wandb=_coerce_wandb(data.get("wandb")),
317
+ )
318
+
319
+ @classmethod
320
+ def from_json(cls, raw: str) -> JobSpec:
321
+ return cls.from_dict(json.loads(raw))
322
+
323
+
324
+ def load_job_spec_from_env() -> JobSpec | None:
325
+ """Load FLASH_JOB_SPEC_JSON or FLASH_JOB_SPEC_PATH if present on a worker node."""
326
+ raw = os.environ.get("FLASH_JOB_SPEC_JSON")
327
+ if raw:
328
+ return JobSpec.from_json(raw)
329
+ path = os.environ.get("FLASH_JOB_SPEC_PATH")
330
+ if path and os.path.exists(path):
331
+ with open(path) as f:
332
+ return JobSpec.from_json(f.read())
333
+ return None
@@ -0,0 +1,192 @@
1
+ Metadata-Version: 2.4
2
+ Name: freesolo-flash-dev
3
+ Version: 0.2.25
4
+ Summary: Flash — managed LoRA post-training (SFT/GRPO) for Freesolo environments, driven by the `flash` CLI
5
+ Project-URL: Homepage, https://github.com/freesolo-co/flash
6
+ Project-URL: Repository, https://github.com/freesolo-co/flash
7
+ Author: Freesolo
8
+ License-Expression: Apache-2.0
9
+ License-File: LICENSE
10
+ Keywords: fine-tuning,freesolo,grpo,llm,lora,rl,sft
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: <3.13,>=3.11
18
+ Provides-Extra: dev
19
+ Requires-Dist: datasets>=2.19; extra == 'dev'
20
+ Requires-Dist: fastapi; extra == 'dev'
21
+ Requires-Dist: freesolo>=0.2.49; extra == 'dev'
22
+ Requires-Dist: httpx>=0.27; extra == 'dev'
23
+ Requires-Dist: huggingface-hub>=0.34; extra == 'dev'
24
+ Requires-Dist: mypy>=1.13.0; extra == 'dev'
25
+ Requires-Dist: pytest>=9.0.3; extra == 'dev'
26
+ Requires-Dist: ruff>=0.6; extra == 'dev'
27
+ Requires-Dist: runpod-flash; extra == 'dev'
28
+ Requires-Dist: uvicorn; extra == 'dev'
29
+ Provides-Extra: gpu
30
+ Requires-Dist: accelerate>=1.4; extra == 'gpu'
31
+ Requires-Dist: bitsandbytes>=0.49; extra == 'gpu'
32
+ Requires-Dist: datasets>=2.19; extra == 'gpu'
33
+ Requires-Dist: freesolo>=0.2.49; extra == 'gpu'
34
+ Requires-Dist: huggingface-hub>=0.34; extra == 'gpu'
35
+ Requires-Dist: peft>=0.19; extra == 'gpu'
36
+ Requires-Dist: torch==2.10.0; extra == 'gpu'
37
+ Requires-Dist: transformers<5.11,>=5.6; extra == 'gpu'
38
+ Requires-Dist: trl<1.7,>=1.6; extra == 'gpu'
39
+ Requires-Dist: vllm==0.19.1; extra == 'gpu'
40
+ Provides-Extra: server
41
+ Requires-Dist: datasets>=2.19; extra == 'server'
42
+ Requires-Dist: fastapi; extra == 'server'
43
+ Requires-Dist: freesolo>=0.2.49; extra == 'server'
44
+ Requires-Dist: httpx>=0.27; extra == 'server'
45
+ Requires-Dist: huggingface-hub>=0.34; extra == 'server'
46
+ Requires-Dist: runpod-flash; extra == 'server'
47
+ Requires-Dist: uvicorn; extra == 'server'
48
+ Description-Content-Type: text/markdown
49
+
50
+ # Flash
51
+
52
+ Managed LoRA post-training service: SFT and GRPO on managed RunPod Flash GPUs.
53
+ The allocator picks the cheapest validated RunPod GPU class that fits the run.
54
+
55
+ ## Scope
56
+
57
+ - `flash train <cfg.toml>` / control-plane `POST /runs` — submit a training job;
58
+ one dedicated GPU per run, supervised server-side (stall watchdog, bounded
59
+ auto-retry resuming from the last streamed checkpoint, endpoint GC).
60
+ - `flash deploy`, `flash chat` — serving for trained adapters.
61
+ - **Freesolo SDK environments.** Every run names a Freesolo environment id.
62
+ Scaffold `environment.py` plus `datasets/train.jsonl`, upload `.` or another
63
+ folder with `flash env push --name <name> <folder>`, then reference the
64
+ returned id. The worker loads it through `freesolo.environments`. There are no
65
+ built-in task environments. Single-turn and bounded multi-turn environments are
66
+ supported.
67
+
68
+ ## Layout
69
+
70
+ - `flash/catalog.py` — curated model catalog (Qwen3 dense supported tier;
71
+ Qwen3.5/3.6 experimental tier) + `model_policy = "allow"` VRAM-fit check + each
72
+ model's `thinking` capability (opt-in reasoning mode `thinking = true`)
73
+ - `flash/schema.py`, `flash/spec.py` — TOML → `JobSpec`
74
+ - `flash/runner.py` — server-side run supervisor (durable job handle,
75
+ retries, cost guard, endpoint GC)
76
+ - `flash/providers/` — RunPod Flash provider code (pricing, gpus, durable
77
+ submit/poll, preflight) behind the `base.Provider` protocol, with an
78
+ `allocator.py` that picks the cheapest fitting class
79
+ - `flash/engine/` — the on-GPU worker (TRL + colocated vLLM rollouts) and the
80
+ shared recipe; SFT targets and RL rewards route through the active environment
81
+ (task-specific grading lives with its example, not in the engine)
82
+ - `flash/envs/` — environment machinery: registry and the adapter that loads
83
+ Freesolo SDK environments onto the worker's interface
84
+ - `flash env setup` — scaffold a starter local Freesolo env, `datasets/train.jsonl`,
85
+ and ready-to-run configs to start from
86
+ - `flash/serve/`, `flash/server/` — adapter serving and the FastAPI control
87
+ plane (run operator-side via the separate `flash-server` command)
88
+ - `flash/mcp/` — stdio MCP bridge for coding agents
89
+ - `Dockerfile` — the control-plane image (used by the repo docker-compose)
90
+ - `tests/` — pytest suite (CPU-only; offline-by-default, no GPU/network)
91
+
92
+ ## Local commands
93
+
94
+ ```bash
95
+ cd flash
96
+ uv sync --extra server
97
+ uv run pytest # CPU tests (offline-by-default, no GPU/network)
98
+ uv run ruff check . && uv run ruff format .
99
+ uv run flash --help
100
+ uv run flash-server # control plane (operator-side, run once)
101
+ ```
102
+
103
+ The control plane owns provider credentials: `RUNPOD_API_KEY` is always required,
104
+ plus the shared `HF_TOKEN`.
105
+ The artifact repo is platform-managed and per-run (each run gets its own
106
+ `Freesolo-Co/flashrun-<run_id>`, written by the operator `HF_TOKEN`); it is not a user
107
+ knob and not an operator-wide env var. Clients authenticate with their freesolo API key
108
+ (`flash login`).
109
+
110
+ ## Release channels
111
+
112
+ Two channels are published to PyPI from the *same source*, distinguished by one line in
113
+ `flash/_channel.py` (`CHANNEL`):
114
+
115
+ | Channel | PyPI package | CLI | Default plane | Published from |
116
+ | --- | --- | --- | --- | --- |
117
+ | prod | `freesolo-flash` | `flash` | `flash.freesolo.co` | push to `main` that bumps `[project].version` (`.github/workflows/publish.yml`) |
118
+ | dev | `freesolo-flash-dev` | `flash-dev` | `flash-dev.freesolo.co` | push to `dev` whose `[tool.flash-dev].version` isn't on PyPI yet (`.github/workflows/publish-dev.yml`) |
119
+
120
+ The two install side by side (distinct package + CLI names). The dev build is produced by
121
+ `scripts/build_dev_dist.py`, which renames the package/CLI and flips `CHANNEL` to `dev` before
122
+ `uv build`. To cut a dev release, bump `[tool.flash-dev].version` and merge to `dev`. Either CLI
123
+ still honours an explicit `FLASH_API_URL` / `flash login --api-url`; the channel only sets the
124
+ default.
125
+
126
+ ## Serving From an API
127
+
128
+ `flash chat` is a CLI wrapper around the Flash control-plane chat endpoint. To call a
129
+ deployed adapter from your own app, deploy the finished run once and then POST chat
130
+ requests with your freesolo API key:
131
+
132
+ ```bash
133
+ export FLASH_API_URL=https://flash.freesolo.co
134
+ export FREESOLO_API_KEY=fslo_...
135
+ export RUN_ID=flash-1782194170-ce1cfcff
136
+
137
+ curl -X POST "$FLASH_API_URL/v1/runs/$RUN_ID/deploy" \
138
+ -H "Authorization: Bearer $FREESOLO_API_KEY" \
139
+ -H "Content-Type: application/json" \
140
+ -d '{"dry_run": false}'
141
+
142
+ curl -X POST "$FLASH_API_URL/v1/runs/$RUN_ID/chat" \
143
+ -H "Authorization: Bearer $FREESOLO_API_KEY" \
144
+ -H "Content-Type: application/json" \
145
+ -d '{
146
+ "messages": [
147
+ {"role": "user", "content": "Write a two-sentence summary of the run."}
148
+ ],
149
+ "temperature": 0.0,
150
+ "max_tokens": 256
151
+ }'
152
+ ```
153
+
154
+ The response uses the OpenAI chat-completions shape:
155
+
156
+ ```json
157
+ {
158
+ "choices": [
159
+ {
160
+ "message": {
161
+ "role": "assistant",
162
+ "content": "..."
163
+ }
164
+ }
165
+ ]
166
+ }
167
+ ```
168
+
169
+ Use `choices[0].message.content` for the generated text. The run id is the adapter id
170
+ for serving. If the run is not deployed yet, `/v1/runs/<run_id>/chat` returns `409`
171
+ with a hint to deploy first.
172
+
173
+ Operators can also call the Modal serving app directly after the adapter is registered.
174
+ The default serving app is `https://clado-ai--freesolo-lora-serving.modal.run`, and
175
+ operators can point Flash at another serving app by setting `FREESOLO_SERVING_URL`.
176
+ Use that same base URL when calling the app directly; pass the run id as `model`:
177
+
178
+ ```bash
179
+ export FREESOLO_SERVING_URL=https://clado-ai--freesolo-lora-serving.modal.run
180
+
181
+ curl -X POST "$FREESOLO_SERVING_URL/v1/chat/completions" \
182
+ -H "Content-Type: application/json" \
183
+ -d '{
184
+ "model": "flash-1782194170-ce1cfcff",
185
+ "messages": [{"role": "user", "content": "Hello"}],
186
+ "temperature": 0.0,
187
+ "max_tokens": 256
188
+ }'
189
+ ```
190
+
191
+ Prefer the Flash control-plane endpoint for user apps because it enforces run ownership
192
+ and forwards per-run serving options such as thinking-mode parity.