freesolo-flash-dev 0.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flash/__init__.py +29 -0
- flash/_channel.py +23 -0
- flash/_fileio.py +35 -0
- flash/_logging.py +49 -0
- flash/_update_check.py +266 -0
- flash/catalog.py +253 -0
- flash/cli/__init__.py +1 -0
- flash/cli/main/__init__.py +227 -0
- flash/cli/main/__main__.py +6 -0
- flash/cli/main/commands.py +636 -0
- flash/cli/main/envpush.py +317 -0
- flash/cli/main/render.py +599 -0
- flash/cli/main/training_doc.py +455 -0
- flash/client/__init__.py +14 -0
- flash/client/config.py +70 -0
- flash/client/http.py +372 -0
- flash/client/runtime_secrets.py +69 -0
- flash/client/specs.py +20 -0
- flash/cost/__init__.py +16 -0
- flash/cost/analytical.py +175 -0
- flash/cost/facts.py +114 -0
- flash/cost/spec.py +113 -0
- flash/cost/types.py +158 -0
- flash/engine/__init__.py +6 -0
- flash/engine/accounting.py +36 -0
- flash/engine/chalk_kernels.py +116 -0
- flash/engine/multiturn_rollout.py +780 -0
- flash/engine/recipe.py +86 -0
- flash/engine/vram.py +603 -0
- flash/engine/worker/__init__.py +2916 -0
- flash/engine/worker/__main__.py +4 -0
- flash/engine/worker/kernel_warmup.py +400 -0
- flash/engine/worker/lora.py +796 -0
- flash/engine/worker/packing.py +366 -0
- flash/engine/worker/perf.py +1048 -0
- flash/envs/__init__.py +10 -0
- flash/envs/adapter/__init__.py +883 -0
- flash/envs/adapter/rubric.py +222 -0
- flash/envs/base.py +52 -0
- flash/envs/registry.py +62 -0
- flash/mcp/__init__.py +1 -0
- flash/mcp/server.py +85 -0
- flash/providers/__init__.py +59 -0
- flash/providers/_auth.py +24 -0
- flash/providers/_http.py +230 -0
- flash/providers/_instance.py +416 -0
- flash/providers/_instance_bootstrap.py +517 -0
- flash/providers/_poll.py +311 -0
- flash/providers/allocator.py +193 -0
- flash/providers/base.py +431 -0
- flash/providers/hyperstack/__init__.py +127 -0
- flash/providers/hyperstack/api.py +522 -0
- flash/providers/hyperstack/auth.py +17 -0
- flash/providers/hyperstack/gpus.py +29 -0
- flash/providers/hyperstack/jobs/__init__.py +632 -0
- flash/providers/hyperstack/jobs/builders.py +122 -0
- flash/providers/hyperstack/preflight.py +23 -0
- flash/providers/hyperstack/pricing.py +26 -0
- flash/providers/hyperstack/train.py +25 -0
- flash/providers/lambdalabs/__init__.py +139 -0
- flash/providers/lambdalabs/api.py +261 -0
- flash/providers/lambdalabs/auth.py +18 -0
- flash/providers/lambdalabs/gpus.py +29 -0
- flash/providers/lambdalabs/jobs/__init__.py +724 -0
- flash/providers/lambdalabs/jobs/builders.py +118 -0
- flash/providers/lambdalabs/preflight.py +27 -0
- flash/providers/lambdalabs/pricing.py +51 -0
- flash/providers/lambdalabs/train.py +27 -0
- flash/providers/preflight.py +55 -0
- flash/providers/realized.py +80 -0
- flash/providers/runpod/__init__.py +130 -0
- flash/providers/runpod/api.py +186 -0
- flash/providers/runpod/auth.py +37 -0
- flash/providers/runpod/cost.py +57 -0
- flash/providers/runpod/gpus.py +46 -0
- flash/providers/runpod/jobs.py +956 -0
- flash/providers/runpod/keys.py +139 -0
- flash/providers/runpod/preflight.py +30 -0
- flash/providers/runpod/preload.py +915 -0
- flash/providers/runpod/pricing.py +18 -0
- flash/providers/runpod/slots.py +79 -0
- flash/providers/runpod/train/__init__.py +150 -0
- flash/providers/runpod/train/deps.py +395 -0
- flash/providers/runpod/train/endpoints.py +820 -0
- flash/py.typed +0 -0
- flash/runner/__init__.py +686 -0
- flash/runner/checkpoints.py +82 -0
- flash/runner/deploy.py +422 -0
- flash/runner/lifecycle.py +672 -0
- flash/schema/__init__.py +375 -0
- flash/schema/fields.py +331 -0
- flash/serve/__init__.py +1 -0
- flash/serve/deploy.py +326 -0
- flash/serve/pricing.py +60 -0
- flash/server/__init__.py +1 -0
- flash/server/__main__.py +20 -0
- flash/server/app.py +961 -0
- flash/server/auth.py +263 -0
- flash/server/billing.py +124 -0
- flash/server/checkpoints.py +110 -0
- flash/server/db.py +160 -0
- flash/server/environment_registry.py +102 -0
- flash/server/envs.py +360 -0
- flash/server/reconcile.py +163 -0
- flash/server/run_registry.py +150 -0
- flash/spec.py +333 -0
- freesolo_flash_dev-0.2.25.dist-info/METADATA +192 -0
- freesolo_flash_dev-0.2.25.dist-info/RECORD +111 -0
- freesolo_flash_dev-0.2.25.dist-info/WHEEL +4 -0
- freesolo_flash_dev-0.2.25.dist-info/entry_points.txt +3 -0
- freesolo_flash_dev-0.2.25.dist-info/licenses/LICENSE +201 -0
flash/schema/__init__.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"""Parse Flash TOML configs into worker JobSpecs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import sys
|
|
7
|
+
import tomllib
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from flash.catalog import normalize_algorithm, resolve_model
|
|
11
|
+
from flash.providers.base import (
|
|
12
|
+
UnsupportedGpuError,
|
|
13
|
+
canonical_gpu,
|
|
14
|
+
provisional_gpu,
|
|
15
|
+
)
|
|
16
|
+
from flash.schema.fields import (
|
|
17
|
+
ConfigError,
|
|
18
|
+
_coerce_scalar,
|
|
19
|
+
_environment_secrets,
|
|
20
|
+
_require_environment_ref,
|
|
21
|
+
_train_float,
|
|
22
|
+
_train_int,
|
|
23
|
+
_train_stops,
|
|
24
|
+
_wandb_spec,
|
|
25
|
+
_worker_env,
|
|
26
|
+
)
|
|
27
|
+
from flash.spec import EnvironmentSpec, GpuSpec, JobSpec, TrainSpec
|
|
28
|
+
|
|
29
|
+
_OWNER_REPO_RE = r"[A-Za-z0-9][A-Za-z0-9._-]*"
|
|
30
|
+
_RUN_ID_RE = r"[A-Za-z0-9][A-Za-z0-9._-]{0,127}"
|
|
31
|
+
_ADAPTER_REF_RE = re.compile(
|
|
32
|
+
rf"^(?P<repo>{_OWNER_REPO_RE}/{_OWNER_REPO_RE}):(?P<phase>sft|rl)/"
|
|
33
|
+
rf"(?P<run_id>{_RUN_ID_RE})/seed(?P<seed>\d+)$"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def load_toml(path: str) -> dict[str, Any]:
|
|
38
|
+
with open(path, "rb") as f:
|
|
39
|
+
return tomllib.load(f)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def spec_from_file(
|
|
43
|
+
path: str,
|
|
44
|
+
run_id: str | None = None,
|
|
45
|
+
overrides: list[str] | None = None,
|
|
46
|
+
extra_configs: list[str] | None = None,
|
|
47
|
+
) -> JobSpec:
|
|
48
|
+
raw = load_toml(path)
|
|
49
|
+
# Composed configs: later files override earlier keys (deep merge).
|
|
50
|
+
for extra in extra_configs or []:
|
|
51
|
+
_deep_merge(raw, load_toml(extra))
|
|
52
|
+
# `--set key=value` dotted overrides (highest precedence).
|
|
53
|
+
for item in overrides or []:
|
|
54
|
+
_apply_override(raw, item)
|
|
55
|
+
return spec_from_dict(raw, run_id=run_id)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _deep_merge(base: dict, extra: dict) -> dict:
|
|
59
|
+
for k, v in extra.items():
|
|
60
|
+
if isinstance(v, dict) and isinstance(base.get(k), dict):
|
|
61
|
+
_deep_merge(base[k], v)
|
|
62
|
+
else:
|
|
63
|
+
base[k] = v
|
|
64
|
+
return base
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _apply_override(raw: dict, item: str) -> None:
|
|
68
|
+
if "=" not in item:
|
|
69
|
+
raise ConfigError(f"--set must be key=value, got {item!r}")
|
|
70
|
+
key, value = item.split("=", 1)
|
|
71
|
+
parts = key.strip().split(".")
|
|
72
|
+
node = raw
|
|
73
|
+
for p in parts[:-1]:
|
|
74
|
+
node = node.setdefault(p, {})
|
|
75
|
+
if not isinstance(node, dict):
|
|
76
|
+
raise ConfigError(f"--set path {key!r} traverses a non-table value")
|
|
77
|
+
leaf = parts[-1]
|
|
78
|
+
# support list values like seeds=[0,1]
|
|
79
|
+
val = value.strip()
|
|
80
|
+
# [wandb] leaves are string-valued labels (project / run name); a numeric- or
|
|
81
|
+
# bool-looking value like `--set wandb.run_name=123` is still the string label the
|
|
82
|
+
# user intends. Preserve it as a string instead of coercing it to int/float/bool
|
|
83
|
+
# (which _wandb_spec's string validation would otherwise reject).
|
|
84
|
+
if parts[0] == "wandb":
|
|
85
|
+
node[leaf] = val
|
|
86
|
+
elif val.startswith("[") and val.endswith("]"):
|
|
87
|
+
inner = val[1:-1].strip()
|
|
88
|
+
node[leaf] = [_coerce_scalar(x.strip()) for x in inner.split(",") if x.strip()]
|
|
89
|
+
else:
|
|
90
|
+
node[leaf] = _coerce_scalar(val)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _init_from_adapter_ref(train_raw: dict[str, Any]) -> str:
|
|
94
|
+
ref_raw = train_raw.get("init_from_adapter")
|
|
95
|
+
if ref_raw is None:
|
|
96
|
+
return ""
|
|
97
|
+
if not isinstance(ref_raw, str):
|
|
98
|
+
raise ConfigError("train.init_from_adapter must be a string")
|
|
99
|
+
ref = ref_raw.strip()
|
|
100
|
+
if not ref:
|
|
101
|
+
return ""
|
|
102
|
+
if _ADAPTER_REF_RE.match(ref):
|
|
103
|
+
return ref
|
|
104
|
+
raise ConfigError(
|
|
105
|
+
"train.init_from_adapter must be the full adapter_ref emitted by `flash status` "
|
|
106
|
+
"(<owner>/<repo>:<phase>/<run_id>/seed<N>)"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Recognized config keys. Anything else is a typo or a knob in the wrong place — reject it loudly
|
|
111
|
+
# rather than silently ignoring it and training (expensively) against defaults. The classic trap:
|
|
112
|
+
# putting GRPO knobs under a `[grpo]` table (they belong under `[train]`), which used to be dropped
|
|
113
|
+
# without a peep — a run would then use the default rollout (16x more completions) at 16x the cost.
|
|
114
|
+
#
|
|
115
|
+
# Some of these are platform-MANAGED, not user knobs: `gpu`, `model_policy`, `run_id`, and
|
|
116
|
+
# `train.hf_repo` are ignored if a user sets them (the control plane derives/assigns them). They
|
|
117
|
+
# remain RECOGNIZED — not rejected — because a round-tripped JobSpec (spec.to_dict(), which the
|
|
118
|
+
# control plane re-parses on submit) still carries them; rejecting would break that re-validation.
|
|
119
|
+
_TOP_LEVEL_KEYS = frozenset(
|
|
120
|
+
{
|
|
121
|
+
"model",
|
|
122
|
+
"algorithm",
|
|
123
|
+
"model_policy",
|
|
124
|
+
"thinking",
|
|
125
|
+
"environment",
|
|
126
|
+
"train",
|
|
127
|
+
"gpu",
|
|
128
|
+
"worker_env",
|
|
129
|
+
"wandb",
|
|
130
|
+
"run_id",
|
|
131
|
+
}
|
|
132
|
+
)
|
|
133
|
+
_TRAIN_KEYS = frozenset(
|
|
134
|
+
{
|
|
135
|
+
"steps",
|
|
136
|
+
"epochs",
|
|
137
|
+
"lora_rank",
|
|
138
|
+
"lora_alpha",
|
|
139
|
+
"seeds",
|
|
140
|
+
"init_from_adapter",
|
|
141
|
+
"hf_repo",
|
|
142
|
+
"learning_rate",
|
|
143
|
+
"batch_size",
|
|
144
|
+
"max_length",
|
|
145
|
+
"save_every",
|
|
146
|
+
"group_size",
|
|
147
|
+
"temperature",
|
|
148
|
+
"max_tokens",
|
|
149
|
+
"kl_penalty_coef",
|
|
150
|
+
"advantage_clip",
|
|
151
|
+
"thinking_length_penalty_coef",
|
|
152
|
+
"stop_sequences",
|
|
153
|
+
"max_steps",
|
|
154
|
+
"max_examples",
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
def spec_from_dict(raw: dict[str, Any], run_id: str | None = None) -> JobSpec:
|
|
158
|
+
# Reject unknown config SECTIONS (table-valued top-level keys) — the footgun is a `[grpo]`
|
|
159
|
+
# table holding rollout knobs that actually belong under `[train]`, silently dropped + run at
|
|
160
|
+
# 16x-cost defaults. We only flag tables, not scalars: callers (e.g. the MCP handler) pass
|
|
161
|
+
# through harmless scalar control flags like `dry_run`/`background` alongside the spec.
|
|
162
|
+
unknown = sorted(k for k in set(raw) - _TOP_LEVEL_KEYS if isinstance(raw[k], dict))
|
|
163
|
+
if unknown:
|
|
164
|
+
hint = ""
|
|
165
|
+
if {"grpo", "sft"} & set(unknown):
|
|
166
|
+
hint = (
|
|
167
|
+
" — GRPO/SFT knobs (group_size, batch_size, max_tokens, …) belong under [train], "
|
|
168
|
+
"not a [grpo]/[sft] table"
|
|
169
|
+
)
|
|
170
|
+
raise ConfigError(
|
|
171
|
+
f"unknown config section(s): {', '.join(unknown)} "
|
|
172
|
+
f"(allowed tables: environment, train, gpu, wandb, worker_env){hint}"
|
|
173
|
+
)
|
|
174
|
+
try:
|
|
175
|
+
model = raw["model"]
|
|
176
|
+
except KeyError as exc:
|
|
177
|
+
raise ConfigError("config must set `model`") from exc
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
algorithm = normalize_algorithm(raw.get("algorithm"))
|
|
181
|
+
except ValueError as exc:
|
|
182
|
+
raise ConfigError(str(exc)) from exc
|
|
183
|
+
# model_policy (curated "catalog" vs any-fitting-HF-model "allow") is NOT a user knob: managed
|
|
184
|
+
# runs always use the curated catalog, so a user-supplied model_policy is ignored. (The "allow"
|
|
185
|
+
# path still exists in resolve_model for internal use, but a submitted config can't select it.)
|
|
186
|
+
model_policy = "catalog"
|
|
187
|
+
thinking = raw.get("thinking", False) # reasoning mode OFF by default (operator preference)
|
|
188
|
+
if not isinstance(thinking, bool):
|
|
189
|
+
raise ConfigError("thinking must be a boolean")
|
|
190
|
+
|
|
191
|
+
# ``is None`` (not ``or {}``): a missing section defaults to an empty table, but a present-
|
|
192
|
+
# but-non-dict value (e.g. ``environment = false``) must reach the "must be a table" check
|
|
193
|
+
# rather than being silently coerced to ``{}`` and bypassing validation.
|
|
194
|
+
env_raw = raw.get("environment")
|
|
195
|
+
if env_raw is None:
|
|
196
|
+
env_raw = {}
|
|
197
|
+
if not isinstance(env_raw, dict):
|
|
198
|
+
raise ConfigError("[environment] must be a table")
|
|
199
|
+
# Local environment paths are gone: a run names a published Freesolo env by [environment] id.
|
|
200
|
+
# A stray `path` (alone or alongside `id`) is a stale config — reject it loudly instead of
|
|
201
|
+
# silently ignoring the key and training against the wrong/missing env.
|
|
202
|
+
if env_raw.get("path"):
|
|
203
|
+
raise ConfigError(
|
|
204
|
+
"local environment paths are no longer supported — remove `path` and reference a "
|
|
205
|
+
"Freesolo environment `id` returned by `flash env push --name <name>`"
|
|
206
|
+
)
|
|
207
|
+
# Validate the [environment] sub-fields before they reach EnvironmentSpec(...). The
|
|
208
|
+
# constructor's ``dict(... or {})`` / ``tuple(str(p) for p in ... or ())`` papers over a falsy
|
|
209
|
+
# value (false -> {}/()) but a present-but-wrong-typed value otherwise crashes opaquely or
|
|
210
|
+
# silently misbehaves: ``params = "x"`` -> ``dict("x")`` ValueError, ``params = 1`` ->
|
|
211
|
+
# ``dict(1)`` TypeError (a 500), and ``pip = "x"`` is char-split into ('x',) (the worker then
|
|
212
|
+
# tries to install bogus one-char packages). A MISSING sub-field — absent OR ``None`` (e.g.
|
|
213
|
+
# JSON ``null``) — keeps its default; any present, NON-None value must be the right type. A
|
|
214
|
+
# falsy ``params = false`` is still rejected, mirroring the section-level rule that
|
|
215
|
+
# ``environment = false`` must fail rather than silently coerce. Mirrors the ``must be a
|
|
216
|
+
# table`` style; a string is never char-split.
|
|
217
|
+
if env_raw.get("params") is not None and not isinstance(env_raw["params"], dict):
|
|
218
|
+
raise ConfigError("[environment] params must be a table")
|
|
219
|
+
if env_raw.get("pip") is not None and not isinstance(env_raw["pip"], (list, tuple)):
|
|
220
|
+
raise ConfigError("[environment] pip must be a list of strings")
|
|
221
|
+
if env_raw.get("pip") is not None and not all(isinstance(p, str) for p in env_raw["pip"]):
|
|
222
|
+
raise ConfigError("[environment] pip entries must be strings")
|
|
223
|
+
environment_secrets = _environment_secrets(env_raw.get("secrets"))
|
|
224
|
+
train_raw = raw.get("train")
|
|
225
|
+
if train_raw is None:
|
|
226
|
+
train_raw = {}
|
|
227
|
+
if not isinstance(train_raw, dict):
|
|
228
|
+
raise ConfigError("[train] must be a table")
|
|
229
|
+
unknown_train = sorted(set(train_raw) - _TRAIN_KEYS)
|
|
230
|
+
if unknown_train:
|
|
231
|
+
raise ConfigError(
|
|
232
|
+
f"[train] unknown key(s): {', '.join(unknown_train)} "
|
|
233
|
+
f"(allowed: {', '.join(sorted(_TRAIN_KEYS))})"
|
|
234
|
+
)
|
|
235
|
+
gpu_raw = raw.get("gpu")
|
|
236
|
+
if gpu_raw is None:
|
|
237
|
+
gpu_raw = {}
|
|
238
|
+
if not isinstance(gpu_raw, dict):
|
|
239
|
+
raise ConfigError("[gpu] must be a table")
|
|
240
|
+
|
|
241
|
+
# GPU allocation is fully automatic: the submit-time allocator always picks the cheapest
|
|
242
|
+
# fitting active RunPod class — there is no GPU pin. A config's gpu.type is not a user knob.
|
|
243
|
+
# ``provisional_gpu`` computes the offline RunPod-static cheapest-validated-that-fits for
|
|
244
|
+
# sizing/display only; the allocator re-resolves it at submit time.
|
|
245
|
+
try:
|
|
246
|
+
# No GPU pin: the cheapest fitting VALIDATED class (the pool the deployed control plane
|
|
247
|
+
# accepts). The submit-time allocator re-resolves it on RunPod.
|
|
248
|
+
gpu_type = provisional_gpu(model, algorithm=algorithm, train=train_raw, thinking=thinking)
|
|
249
|
+
except UnsupportedGpuError as exc:
|
|
250
|
+
raise ConfigError(str(exc)) from exc
|
|
251
|
+
try:
|
|
252
|
+
info = resolve_model(model, algorithm, policy=model_policy, gpu=gpu_type)
|
|
253
|
+
except ValueError as exc:
|
|
254
|
+
raise ConfigError(str(exc)) from exc
|
|
255
|
+
if thinking and info.thinking == "none":
|
|
256
|
+
raise ConfigError(
|
|
257
|
+
f"{model} does not support thinking mode (its chat template has no "
|
|
258
|
+
f"<think> support); pick a thinking-capable model — `flash models` lists "
|
|
259
|
+
f"each model's thinking capability"
|
|
260
|
+
)
|
|
261
|
+
if not thinking and info.thinking == "always":
|
|
262
|
+
raise ConfigError(
|
|
263
|
+
f"{model} always emits <think> reasoning and cannot run with thinking "
|
|
264
|
+
f"disabled; set thinking = true"
|
|
265
|
+
)
|
|
266
|
+
if thinking and info.thinking == "unknown":
|
|
267
|
+
# stderr, not stdout: spec_from_dict runs inside flash/mcp/server.py, which speaks a
|
|
268
|
+
# one-JSON-object-per-line protocol on stdout — a warning line there corrupts the stream.
|
|
269
|
+
print(
|
|
270
|
+
f"warning: open-model policy: cannot verify that {model}'s chat template "
|
|
271
|
+
f"supports thinking mode; the run proceeds with enable_thinking=true",
|
|
272
|
+
file=sys.stderr,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# worker_env is the lower-level per-run escape hatch ([worker_env] table, string-valued,
|
|
276
|
+
# secret-guarded; the worker reads it for the per-run chalk/kernel opt-in). The optional
|
|
277
|
+
# [wandb] naming table is a separate, typed spec field (JobSpec.wandb) — NOT folded into
|
|
278
|
+
# worker_env env vars.
|
|
279
|
+
worker_env = _worker_env(raw.get("worker_env"))
|
|
280
|
+
wandb_spec = _wandb_spec(raw.get("wandb"))
|
|
281
|
+
|
|
282
|
+
spec = JobSpec(
|
|
283
|
+
model=model,
|
|
284
|
+
algorithm=algorithm,
|
|
285
|
+
environment=EnvironmentSpec(
|
|
286
|
+
id=str(env_raw.get("id") or ""),
|
|
287
|
+
params=dict(env_raw.get("params") or {}),
|
|
288
|
+
pip=tuple(str(p) for p in env_raw.get("pip") or ()),
|
|
289
|
+
secrets=environment_secrets,
|
|
290
|
+
),
|
|
291
|
+
train=TrainSpec(
|
|
292
|
+
steps=_train_int(train_raw, "steps", minimum=1),
|
|
293
|
+
epochs=_train_int(train_raw, "epochs", minimum=1),
|
|
294
|
+
lora_rank=_train_int(train_raw, "lora_rank", minimum=1) or 32,
|
|
295
|
+
lora_alpha=_train_int(train_raw, "lora_alpha", minimum=1) or 64,
|
|
296
|
+
seeds=tuple(int(s) for s in train_raw.get("seeds", (0,))),
|
|
297
|
+
init_from_adapter=_init_from_adapter_ref(train_raw),
|
|
298
|
+
# hf_repo is assigned by the control plane (a per-run private dataset under the
|
|
299
|
+
# operator's namespace, written by the operator HF_TOKEN); a user-supplied
|
|
300
|
+
# [train] hf_repo is ignored. See flash.runner.submit_job._assign_managed_hf_repo.
|
|
301
|
+
hf_repo="",
|
|
302
|
+
learning_rate=_train_float(train_raw, "learning_rate", minimum=0.0, exclusive=True),
|
|
303
|
+
batch_size=_train_int(train_raw, "batch_size", minimum=1),
|
|
304
|
+
max_length=_train_int(train_raw, "max_length", minimum=1),
|
|
305
|
+
save_every=_train_int(train_raw, "save_every", minimum=1),
|
|
306
|
+
group_size=_train_int(train_raw, "group_size", minimum=1),
|
|
307
|
+
temperature=_train_float(train_raw, "temperature", minimum=0.0),
|
|
308
|
+
max_tokens=_train_int(train_raw, "max_tokens", minimum=1),
|
|
309
|
+
kl_penalty_coef=_train_float(train_raw, "kl_penalty_coef", minimum=0.0),
|
|
310
|
+
advantage_clip=_train_float(train_raw, "advantage_clip", minimum=0.0),
|
|
311
|
+
thinking_length_penalty_coef=_train_float(
|
|
312
|
+
train_raw, "thinking_length_penalty_coef", minimum=0.0, maximum=1.0
|
|
313
|
+
),
|
|
314
|
+
stop_sequences=_train_stops(train_raw),
|
|
315
|
+
# SFT caps: max_steps caps optimizer steps (cheap pre-flight smoke); max_examples
|
|
316
|
+
# truncates the SFT dataset. minimum=0 so an explicit 0 means "no cap" (matches the
|
|
317
|
+
# TrainSpec "None/0 -> no cap" contract); the worker reads these from [train].
|
|
318
|
+
max_steps=_train_int(train_raw, "max_steps", minimum=0),
|
|
319
|
+
max_examples=_train_int(train_raw, "max_examples", minimum=0),
|
|
320
|
+
),
|
|
321
|
+
# GPU allocation, disk sizing, retry budget, and network volumes are all platform-managed:
|
|
322
|
+
# the submit-time allocator picks the cheapest fitting validated RunPod GPU, disk is raised
|
|
323
|
+
# to the model's minimum server-side, and the infra knobs are operator defaults. A user
|
|
324
|
+
# [gpu] table is ignored; gpu_type here is the offline sizing/display provisional,
|
|
325
|
+
# re-resolved at submit.
|
|
326
|
+
gpu=GpuSpec(type=gpu_type),
|
|
327
|
+
run_id=run_id or "local", # server-assigned (new_run_id at create_run); never user-set
|
|
328
|
+
worker_env=worker_env,
|
|
329
|
+
model_policy=model_policy,
|
|
330
|
+
thinking=thinking,
|
|
331
|
+
wandb=wandb_spec,
|
|
332
|
+
)
|
|
333
|
+
_validate_spec(spec)
|
|
334
|
+
return spec
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _validate_spec(spec: JobSpec) -> None:
|
|
338
|
+
if not spec.train.seeds:
|
|
339
|
+
raise ConfigError("train.seeds must contain at least one seed")
|
|
340
|
+
try:
|
|
341
|
+
canonical_gpu(spec.gpu.type)
|
|
342
|
+
except UnsupportedGpuError as exc:
|
|
343
|
+
raise ConfigError(str(exc)) from exc
|
|
344
|
+
# GRPO is step-driven; SFT is epoch-driven. Reject a non-positive explicit count
|
|
345
|
+
# for whichever the algorithm consumes, so an invalid config fails here instead of
|
|
346
|
+
# provisioning a worker that silently falls back to a default count.
|
|
347
|
+
if spec.algorithm == "grpo" and spec.train.steps is not None and spec.train.steps <= 0:
|
|
348
|
+
raise ConfigError("train.steps must be positive for GRPO")
|
|
349
|
+
if spec.algorithm == "sft" and spec.train.epochs is not None and spec.train.epochs <= 0:
|
|
350
|
+
raise ConfigError("train.epochs must be positive for SFT")
|
|
351
|
+
# Every run must name a Freesolo environment by [environment] id.
|
|
352
|
+
# There is no default environment and no local path mode.
|
|
353
|
+
if not spec.environment.id:
|
|
354
|
+
raise ConfigError(
|
|
355
|
+
"config must set [environment] id (upload an environment with "
|
|
356
|
+
'`flash env push --name <name>` and paste the returned id, e.g. "your-name/your-env"); '
|
|
357
|
+
"there is no local path mode"
|
|
358
|
+
)
|
|
359
|
+
_require_environment_ref(
|
|
360
|
+
spec.environment.id,
|
|
361
|
+
'[environment] id must be a Freesolo environment id (for example "your-name/your-env")',
|
|
362
|
+
)
|
|
363
|
+
if spec.train.lora_rank <= 0:
|
|
364
|
+
raise ConfigError("train.lora_rank must be positive")
|
|
365
|
+
# NOTE: the per-run HF artifact repo (train.hf_repo) is NOT validated here — it is no longer a
|
|
366
|
+
# user field. The control plane assigns it server-side (a per-run private dataset under the
|
|
367
|
+
# operator's namespace) in flash.runner.submit_job; see _assign_managed_hf_repo.
|
|
368
|
+
# GRPO recipe knobs (group_size/temperature/max_tokens/kl_penalty_coef/advantage_clip/
|
|
369
|
+
# thinking_length_penalty_coef) are range-validated at parse time by the _train_int/
|
|
370
|
+
# _train_float coercers above (including the thinking_length_penalty_coef <= 1.0 upper
|
|
371
|
+
# bound), so no re-check is needed here.
|
|
372
|
+
# lora_alpha scales the adapter contribution; 0 (or negative) trains a paid run
|
|
373
|
+
# that produces a no-op adapter (zero scaling at serve). Reject up front.
|
|
374
|
+
if spec.train.lora_alpha <= 0:
|
|
375
|
+
raise ConfigError("train.lora_alpha must be positive")
|