freesolo-flash-dev 0.2.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. flash/__init__.py +29 -0
  2. flash/_channel.py +23 -0
  3. flash/_fileio.py +35 -0
  4. flash/_logging.py +49 -0
  5. flash/_update_check.py +266 -0
  6. flash/catalog.py +253 -0
  7. flash/cli/__init__.py +1 -0
  8. flash/cli/main/__init__.py +227 -0
  9. flash/cli/main/__main__.py +6 -0
  10. flash/cli/main/commands.py +636 -0
  11. flash/cli/main/envpush.py +317 -0
  12. flash/cli/main/render.py +599 -0
  13. flash/cli/main/training_doc.py +455 -0
  14. flash/client/__init__.py +14 -0
  15. flash/client/config.py +70 -0
  16. flash/client/http.py +372 -0
  17. flash/client/runtime_secrets.py +69 -0
  18. flash/client/specs.py +20 -0
  19. flash/cost/__init__.py +16 -0
  20. flash/cost/analytical.py +175 -0
  21. flash/cost/facts.py +114 -0
  22. flash/cost/spec.py +113 -0
  23. flash/cost/types.py +158 -0
  24. flash/engine/__init__.py +6 -0
  25. flash/engine/accounting.py +36 -0
  26. flash/engine/chalk_kernels.py +116 -0
  27. flash/engine/multiturn_rollout.py +780 -0
  28. flash/engine/recipe.py +86 -0
  29. flash/engine/vram.py +603 -0
  30. flash/engine/worker/__init__.py +2916 -0
  31. flash/engine/worker/__main__.py +4 -0
  32. flash/engine/worker/kernel_warmup.py +400 -0
  33. flash/engine/worker/lora.py +796 -0
  34. flash/engine/worker/packing.py +366 -0
  35. flash/engine/worker/perf.py +1048 -0
  36. flash/envs/__init__.py +10 -0
  37. flash/envs/adapter/__init__.py +883 -0
  38. flash/envs/adapter/rubric.py +222 -0
  39. flash/envs/base.py +52 -0
  40. flash/envs/registry.py +62 -0
  41. flash/mcp/__init__.py +1 -0
  42. flash/mcp/server.py +85 -0
  43. flash/providers/__init__.py +59 -0
  44. flash/providers/_auth.py +24 -0
  45. flash/providers/_http.py +230 -0
  46. flash/providers/_instance.py +416 -0
  47. flash/providers/_instance_bootstrap.py +517 -0
  48. flash/providers/_poll.py +311 -0
  49. flash/providers/allocator.py +193 -0
  50. flash/providers/base.py +431 -0
  51. flash/providers/hyperstack/__init__.py +127 -0
  52. flash/providers/hyperstack/api.py +522 -0
  53. flash/providers/hyperstack/auth.py +17 -0
  54. flash/providers/hyperstack/gpus.py +29 -0
  55. flash/providers/hyperstack/jobs/__init__.py +632 -0
  56. flash/providers/hyperstack/jobs/builders.py +122 -0
  57. flash/providers/hyperstack/preflight.py +23 -0
  58. flash/providers/hyperstack/pricing.py +26 -0
  59. flash/providers/hyperstack/train.py +25 -0
  60. flash/providers/lambdalabs/__init__.py +139 -0
  61. flash/providers/lambdalabs/api.py +261 -0
  62. flash/providers/lambdalabs/auth.py +18 -0
  63. flash/providers/lambdalabs/gpus.py +29 -0
  64. flash/providers/lambdalabs/jobs/__init__.py +724 -0
  65. flash/providers/lambdalabs/jobs/builders.py +118 -0
  66. flash/providers/lambdalabs/preflight.py +27 -0
  67. flash/providers/lambdalabs/pricing.py +51 -0
  68. flash/providers/lambdalabs/train.py +27 -0
  69. flash/providers/preflight.py +55 -0
  70. flash/providers/realized.py +80 -0
  71. flash/providers/runpod/__init__.py +130 -0
  72. flash/providers/runpod/api.py +186 -0
  73. flash/providers/runpod/auth.py +37 -0
  74. flash/providers/runpod/cost.py +57 -0
  75. flash/providers/runpod/gpus.py +46 -0
  76. flash/providers/runpod/jobs.py +956 -0
  77. flash/providers/runpod/keys.py +139 -0
  78. flash/providers/runpod/preflight.py +30 -0
  79. flash/providers/runpod/preload.py +915 -0
  80. flash/providers/runpod/pricing.py +18 -0
  81. flash/providers/runpod/slots.py +79 -0
  82. flash/providers/runpod/train/__init__.py +150 -0
  83. flash/providers/runpod/train/deps.py +395 -0
  84. flash/providers/runpod/train/endpoints.py +820 -0
  85. flash/py.typed +0 -0
  86. flash/runner/__init__.py +686 -0
  87. flash/runner/checkpoints.py +82 -0
  88. flash/runner/deploy.py +422 -0
  89. flash/runner/lifecycle.py +672 -0
  90. flash/schema/__init__.py +375 -0
  91. flash/schema/fields.py +331 -0
  92. flash/serve/__init__.py +1 -0
  93. flash/serve/deploy.py +326 -0
  94. flash/serve/pricing.py +60 -0
  95. flash/server/__init__.py +1 -0
  96. flash/server/__main__.py +20 -0
  97. flash/server/app.py +961 -0
  98. flash/server/auth.py +263 -0
  99. flash/server/billing.py +124 -0
  100. flash/server/checkpoints.py +110 -0
  101. flash/server/db.py +160 -0
  102. flash/server/environment_registry.py +102 -0
  103. flash/server/envs.py +360 -0
  104. flash/server/reconcile.py +163 -0
  105. flash/server/run_registry.py +150 -0
  106. flash/spec.py +333 -0
  107. freesolo_flash_dev-0.2.25.dist-info/METADATA +192 -0
  108. freesolo_flash_dev-0.2.25.dist-info/RECORD +111 -0
  109. freesolo_flash_dev-0.2.25.dist-info/WHEEL +4 -0
  110. freesolo_flash_dev-0.2.25.dist-info/entry_points.txt +3 -0
  111. freesolo_flash_dev-0.2.25.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,375 @@
1
+ """Parse Flash TOML configs into worker JobSpecs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import sys
7
+ import tomllib
8
+ from typing import Any
9
+
10
+ from flash.catalog import normalize_algorithm, resolve_model
11
+ from flash.providers.base import (
12
+ UnsupportedGpuError,
13
+ canonical_gpu,
14
+ provisional_gpu,
15
+ )
16
+ from flash.schema.fields import (
17
+ ConfigError,
18
+ _coerce_scalar,
19
+ _environment_secrets,
20
+ _require_environment_ref,
21
+ _train_float,
22
+ _train_int,
23
+ _train_stops,
24
+ _wandb_spec,
25
+ _worker_env,
26
+ )
27
+ from flash.spec import EnvironmentSpec, GpuSpec, JobSpec, TrainSpec
28
+
29
+ _OWNER_REPO_RE = r"[A-Za-z0-9][A-Za-z0-9._-]*"
30
+ _RUN_ID_RE = r"[A-Za-z0-9][A-Za-z0-9._-]{0,127}"
31
+ _ADAPTER_REF_RE = re.compile(
32
+ rf"^(?P<repo>{_OWNER_REPO_RE}/{_OWNER_REPO_RE}):(?P<phase>sft|rl)/"
33
+ rf"(?P<run_id>{_RUN_ID_RE})/seed(?P<seed>\d+)$"
34
+ )
35
+
36
+
37
+ def load_toml(path: str) -> dict[str, Any]:
38
+ with open(path, "rb") as f:
39
+ return tomllib.load(f)
40
+
41
+
42
+ def spec_from_file(
43
+ path: str,
44
+ run_id: str | None = None,
45
+ overrides: list[str] | None = None,
46
+ extra_configs: list[str] | None = None,
47
+ ) -> JobSpec:
48
+ raw = load_toml(path)
49
+ # Composed configs: later files override earlier keys (deep merge).
50
+ for extra in extra_configs or []:
51
+ _deep_merge(raw, load_toml(extra))
52
+ # `--set key=value` dotted overrides (highest precedence).
53
+ for item in overrides or []:
54
+ _apply_override(raw, item)
55
+ return spec_from_dict(raw, run_id=run_id)
56
+
57
+
58
+ def _deep_merge(base: dict, extra: dict) -> dict:
59
+ for k, v in extra.items():
60
+ if isinstance(v, dict) and isinstance(base.get(k), dict):
61
+ _deep_merge(base[k], v)
62
+ else:
63
+ base[k] = v
64
+ return base
65
+
66
+
67
+ def _apply_override(raw: dict, item: str) -> None:
68
+ if "=" not in item:
69
+ raise ConfigError(f"--set must be key=value, got {item!r}")
70
+ key, value = item.split("=", 1)
71
+ parts = key.strip().split(".")
72
+ node = raw
73
+ for p in parts[:-1]:
74
+ node = node.setdefault(p, {})
75
+ if not isinstance(node, dict):
76
+ raise ConfigError(f"--set path {key!r} traverses a non-table value")
77
+ leaf = parts[-1]
78
+ # support list values like seeds=[0,1]
79
+ val = value.strip()
80
+ # [wandb] leaves are string-valued labels (project / run name); a numeric- or
81
+ # bool-looking value like `--set wandb.run_name=123` is still the string label the
82
+ # user intends. Preserve it as a string instead of coercing it to int/float/bool
83
+ # (which _wandb_spec's string validation would otherwise reject).
84
+ if parts[0] == "wandb":
85
+ node[leaf] = val
86
+ elif val.startswith("[") and val.endswith("]"):
87
+ inner = val[1:-1].strip()
88
+ node[leaf] = [_coerce_scalar(x.strip()) for x in inner.split(",") if x.strip()]
89
+ else:
90
+ node[leaf] = _coerce_scalar(val)
91
+
92
+
93
+ def _init_from_adapter_ref(train_raw: dict[str, Any]) -> str:
94
+ ref_raw = train_raw.get("init_from_adapter")
95
+ if ref_raw is None:
96
+ return ""
97
+ if not isinstance(ref_raw, str):
98
+ raise ConfigError("train.init_from_adapter must be a string")
99
+ ref = ref_raw.strip()
100
+ if not ref:
101
+ return ""
102
+ if _ADAPTER_REF_RE.match(ref):
103
+ return ref
104
+ raise ConfigError(
105
+ "train.init_from_adapter must be the full adapter_ref emitted by `flash status` "
106
+ "(<owner>/<repo>:<phase>/<run_id>/seed<N>)"
107
+ )
108
+
109
+
110
+ # Recognized config keys. Anything else is a typo or a knob in the wrong place — reject it loudly
111
+ # rather than silently ignoring it and training (expensively) against defaults. The classic trap:
112
+ # putting GRPO knobs under a `[grpo]` table (they belong under `[train]`), which used to be dropped
113
+ # without a peep — a run would then use the default rollout (16x more completions) at 16x the cost.
114
+ #
115
+ # Some of these are platform-MANAGED, not user knobs: `gpu`, `model_policy`, `run_id`, and
116
+ # `train.hf_repo` are ignored if a user sets them (the control plane derives/assigns them). They
117
+ # remain RECOGNIZED — not rejected — because a round-tripped JobSpec (spec.to_dict(), which the
118
+ # control plane re-parses on submit) still carries them; rejecting would break that re-validation.
119
+ _TOP_LEVEL_KEYS = frozenset(
120
+ {
121
+ "model",
122
+ "algorithm",
123
+ "model_policy",
124
+ "thinking",
125
+ "environment",
126
+ "train",
127
+ "gpu",
128
+ "worker_env",
129
+ "wandb",
130
+ "run_id",
131
+ }
132
+ )
133
+ _TRAIN_KEYS = frozenset(
134
+ {
135
+ "steps",
136
+ "epochs",
137
+ "lora_rank",
138
+ "lora_alpha",
139
+ "seeds",
140
+ "init_from_adapter",
141
+ "hf_repo",
142
+ "learning_rate",
143
+ "batch_size",
144
+ "max_length",
145
+ "save_every",
146
+ "group_size",
147
+ "temperature",
148
+ "max_tokens",
149
+ "kl_penalty_coef",
150
+ "advantage_clip",
151
+ "thinking_length_penalty_coef",
152
+ "stop_sequences",
153
+ "max_steps",
154
+ "max_examples",
155
+ }
156
+ )
157
+ def spec_from_dict(raw: dict[str, Any], run_id: str | None = None) -> JobSpec:
158
+ # Reject unknown config SECTIONS (table-valued top-level keys) — the footgun is a `[grpo]`
159
+ # table holding rollout knobs that actually belong under `[train]`, silently dropped + run at
160
+ # 16x-cost defaults. We only flag tables, not scalars: callers (e.g. the MCP handler) pass
161
+ # through harmless scalar control flags like `dry_run`/`background` alongside the spec.
162
+ unknown = sorted(k for k in set(raw) - _TOP_LEVEL_KEYS if isinstance(raw[k], dict))
163
+ if unknown:
164
+ hint = ""
165
+ if {"grpo", "sft"} & set(unknown):
166
+ hint = (
167
+ " — GRPO/SFT knobs (group_size, batch_size, max_tokens, …) belong under [train], "
168
+ "not a [grpo]/[sft] table"
169
+ )
170
+ raise ConfigError(
171
+ f"unknown config section(s): {', '.join(unknown)} "
172
+ f"(allowed tables: environment, train, gpu, wandb, worker_env){hint}"
173
+ )
174
+ try:
175
+ model = raw["model"]
176
+ except KeyError as exc:
177
+ raise ConfigError("config must set `model`") from exc
178
+
179
+ try:
180
+ algorithm = normalize_algorithm(raw.get("algorithm"))
181
+ except ValueError as exc:
182
+ raise ConfigError(str(exc)) from exc
183
+ # model_policy (curated "catalog" vs any-fitting-HF-model "allow") is NOT a user knob: managed
184
+ # runs always use the curated catalog, so a user-supplied model_policy is ignored. (The "allow"
185
+ # path still exists in resolve_model for internal use, but a submitted config can't select it.)
186
+ model_policy = "catalog"
187
+ thinking = raw.get("thinking", False) # reasoning mode OFF by default (operator preference)
188
+ if not isinstance(thinking, bool):
189
+ raise ConfigError("thinking must be a boolean")
190
+
191
+ # ``is None`` (not ``or {}``): a missing section defaults to an empty table, but a present-
192
+ # but-non-dict value (e.g. ``environment = false``) must reach the "must be a table" check
193
+ # rather than being silently coerced to ``{}`` and bypassing validation.
194
+ env_raw = raw.get("environment")
195
+ if env_raw is None:
196
+ env_raw = {}
197
+ if not isinstance(env_raw, dict):
198
+ raise ConfigError("[environment] must be a table")
199
+ # Local environment paths are gone: a run names a published Freesolo env by [environment] id.
200
+ # A stray `path` (alone or alongside `id`) is a stale config — reject it loudly instead of
201
+ # silently ignoring the key and training against the wrong/missing env.
202
+ if env_raw.get("path"):
203
+ raise ConfigError(
204
+ "local environment paths are no longer supported — remove `path` and reference a "
205
+ "Freesolo environment `id` returned by `flash env push --name <name>`"
206
+ )
207
+ # Validate the [environment] sub-fields before they reach EnvironmentSpec(...). The
208
+ # constructor's ``dict(... or {})`` / ``tuple(str(p) for p in ... or ())`` papers over a falsy
209
+ # value (false -> {}/()) but a present-but-wrong-typed value otherwise crashes opaquely or
210
+ # silently misbehaves: ``params = "x"`` -> ``dict("x")`` ValueError, ``params = 1`` ->
211
+ # ``dict(1)`` TypeError (a 500), and ``pip = "x"`` is char-split into ('x',) (the worker then
212
+ # tries to install bogus one-char packages). A MISSING sub-field — absent OR ``None`` (e.g.
213
+ # JSON ``null``) — keeps its default; any present, NON-None value must be the right type. A
214
+ # falsy ``params = false`` is still rejected, mirroring the section-level rule that
215
+ # ``environment = false`` must fail rather than silently coerce. Mirrors the ``must be a
216
+ # table`` style; a string is never char-split.
217
+ if env_raw.get("params") is not None and not isinstance(env_raw["params"], dict):
218
+ raise ConfigError("[environment] params must be a table")
219
+ if env_raw.get("pip") is not None and not isinstance(env_raw["pip"], (list, tuple)):
220
+ raise ConfigError("[environment] pip must be a list of strings")
221
+ if env_raw.get("pip") is not None and not all(isinstance(p, str) for p in env_raw["pip"]):
222
+ raise ConfigError("[environment] pip entries must be strings")
223
+ environment_secrets = _environment_secrets(env_raw.get("secrets"))
224
+ train_raw = raw.get("train")
225
+ if train_raw is None:
226
+ train_raw = {}
227
+ if not isinstance(train_raw, dict):
228
+ raise ConfigError("[train] must be a table")
229
+ unknown_train = sorted(set(train_raw) - _TRAIN_KEYS)
230
+ if unknown_train:
231
+ raise ConfigError(
232
+ f"[train] unknown key(s): {', '.join(unknown_train)} "
233
+ f"(allowed: {', '.join(sorted(_TRAIN_KEYS))})"
234
+ )
235
+ gpu_raw = raw.get("gpu")
236
+ if gpu_raw is None:
237
+ gpu_raw = {}
238
+ if not isinstance(gpu_raw, dict):
239
+ raise ConfigError("[gpu] must be a table")
240
+
241
+ # GPU allocation is fully automatic: the submit-time allocator always picks the cheapest
242
+ # fitting active RunPod class — there is no GPU pin. A config's gpu.type is not a user knob.
243
+ # ``provisional_gpu`` computes the offline RunPod-static cheapest-validated-that-fits for
244
+ # sizing/display only; the allocator re-resolves it at submit time.
245
+ try:
246
+ # No GPU pin: the cheapest fitting VALIDATED class (the pool the deployed control plane
247
+ # accepts). The submit-time allocator re-resolves it on RunPod.
248
+ gpu_type = provisional_gpu(model, algorithm=algorithm, train=train_raw, thinking=thinking)
249
+ except UnsupportedGpuError as exc:
250
+ raise ConfigError(str(exc)) from exc
251
+ try:
252
+ info = resolve_model(model, algorithm, policy=model_policy, gpu=gpu_type)
253
+ except ValueError as exc:
254
+ raise ConfigError(str(exc)) from exc
255
+ if thinking and info.thinking == "none":
256
+ raise ConfigError(
257
+ f"{model} does not support thinking mode (its chat template has no "
258
+ f"<think> support); pick a thinking-capable model — `flash models` lists "
259
+ f"each model's thinking capability"
260
+ )
261
+ if not thinking and info.thinking == "always":
262
+ raise ConfigError(
263
+ f"{model} always emits <think> reasoning and cannot run with thinking "
264
+ f"disabled; set thinking = true"
265
+ )
266
+ if thinking and info.thinking == "unknown":
267
+ # stderr, not stdout: spec_from_dict runs inside flash/mcp/server.py, which speaks a
268
+ # one-JSON-object-per-line protocol on stdout — a warning line there corrupts the stream.
269
+ print(
270
+ f"warning: open-model policy: cannot verify that {model}'s chat template "
271
+ f"supports thinking mode; the run proceeds with enable_thinking=true",
272
+ file=sys.stderr,
273
+ )
274
+
275
+ # worker_env is the lower-level per-run escape hatch ([worker_env] table, string-valued,
276
+ # secret-guarded; the worker reads it for the per-run chalk/kernel opt-in). The optional
277
+ # [wandb] naming table is a separate, typed spec field (JobSpec.wandb) — NOT folded into
278
+ # worker_env env vars.
279
+ worker_env = _worker_env(raw.get("worker_env"))
280
+ wandb_spec = _wandb_spec(raw.get("wandb"))
281
+
282
+ spec = JobSpec(
283
+ model=model,
284
+ algorithm=algorithm,
285
+ environment=EnvironmentSpec(
286
+ id=str(env_raw.get("id") or ""),
287
+ params=dict(env_raw.get("params") or {}),
288
+ pip=tuple(str(p) for p in env_raw.get("pip") or ()),
289
+ secrets=environment_secrets,
290
+ ),
291
+ train=TrainSpec(
292
+ steps=_train_int(train_raw, "steps", minimum=1),
293
+ epochs=_train_int(train_raw, "epochs", minimum=1),
294
+ lora_rank=_train_int(train_raw, "lora_rank", minimum=1) or 32,
295
+ lora_alpha=_train_int(train_raw, "lora_alpha", minimum=1) or 64,
296
+ seeds=tuple(int(s) for s in train_raw.get("seeds", (0,))),
297
+ init_from_adapter=_init_from_adapter_ref(train_raw),
298
+ # hf_repo is assigned by the control plane (a per-run private dataset under the
299
+ # operator's namespace, written by the operator HF_TOKEN); a user-supplied
300
+ # [train] hf_repo is ignored. See flash.runner.submit_job._assign_managed_hf_repo.
301
+ hf_repo="",
302
+ learning_rate=_train_float(train_raw, "learning_rate", minimum=0.0, exclusive=True),
303
+ batch_size=_train_int(train_raw, "batch_size", minimum=1),
304
+ max_length=_train_int(train_raw, "max_length", minimum=1),
305
+ save_every=_train_int(train_raw, "save_every", minimum=1),
306
+ group_size=_train_int(train_raw, "group_size", minimum=1),
307
+ temperature=_train_float(train_raw, "temperature", minimum=0.0),
308
+ max_tokens=_train_int(train_raw, "max_tokens", minimum=1),
309
+ kl_penalty_coef=_train_float(train_raw, "kl_penalty_coef", minimum=0.0),
310
+ advantage_clip=_train_float(train_raw, "advantage_clip", minimum=0.0),
311
+ thinking_length_penalty_coef=_train_float(
312
+ train_raw, "thinking_length_penalty_coef", minimum=0.0, maximum=1.0
313
+ ),
314
+ stop_sequences=_train_stops(train_raw),
315
+ # SFT caps: max_steps caps optimizer steps (cheap pre-flight smoke); max_examples
316
+ # truncates the SFT dataset. minimum=0 so an explicit 0 means "no cap" (matches the
317
+ # TrainSpec "None/0 -> no cap" contract); the worker reads these from [train].
318
+ max_steps=_train_int(train_raw, "max_steps", minimum=0),
319
+ max_examples=_train_int(train_raw, "max_examples", minimum=0),
320
+ ),
321
+ # GPU allocation, disk sizing, retry budget, and network volumes are all platform-managed:
322
+ # the submit-time allocator picks the cheapest fitting validated RunPod GPU, disk is raised
323
+ # to the model's minimum server-side, and the infra knobs are operator defaults. A user
324
+ # [gpu] table is ignored; gpu_type here is the offline sizing/display provisional,
325
+ # re-resolved at submit.
326
+ gpu=GpuSpec(type=gpu_type),
327
+ run_id=run_id or "local", # server-assigned (new_run_id at create_run); never user-set
328
+ worker_env=worker_env,
329
+ model_policy=model_policy,
330
+ thinking=thinking,
331
+ wandb=wandb_spec,
332
+ )
333
+ _validate_spec(spec)
334
+ return spec
335
+
336
+
337
+ def _validate_spec(spec: JobSpec) -> None:
338
+ if not spec.train.seeds:
339
+ raise ConfigError("train.seeds must contain at least one seed")
340
+ try:
341
+ canonical_gpu(spec.gpu.type)
342
+ except UnsupportedGpuError as exc:
343
+ raise ConfigError(str(exc)) from exc
344
+ # GRPO is step-driven; SFT is epoch-driven. Reject a non-positive explicit count
345
+ # for whichever the algorithm consumes, so an invalid config fails here instead of
346
+ # provisioning a worker that silently falls back to a default count.
347
+ if spec.algorithm == "grpo" and spec.train.steps is not None and spec.train.steps <= 0:
348
+ raise ConfigError("train.steps must be positive for GRPO")
349
+ if spec.algorithm == "sft" and spec.train.epochs is not None and spec.train.epochs <= 0:
350
+ raise ConfigError("train.epochs must be positive for SFT")
351
+ # Every run must name a Freesolo environment by [environment] id.
352
+ # There is no default environment and no local path mode.
353
+ if not spec.environment.id:
354
+ raise ConfigError(
355
+ "config must set [environment] id (upload an environment with "
356
+ '`flash env push --name <name>` and paste the returned id, e.g. "your-name/your-env"); '
357
+ "there is no local path mode"
358
+ )
359
+ _require_environment_ref(
360
+ spec.environment.id,
361
+ '[environment] id must be a Freesolo environment id (for example "your-name/your-env")',
362
+ )
363
+ if spec.train.lora_rank <= 0:
364
+ raise ConfigError("train.lora_rank must be positive")
365
+ # NOTE: the per-run HF artifact repo (train.hf_repo) is NOT validated here — it is no longer a
366
+ # user field. The control plane assigns it server-side (a per-run private dataset under the
367
+ # operator's namespace) in flash.runner.submit_job; see _assign_managed_hf_repo.
368
+ # GRPO recipe knobs (group_size/temperature/max_tokens/kl_penalty_coef/advantage_clip/
369
+ # thinking_length_penalty_coef) are range-validated at parse time by the _train_int/
370
+ # _train_float coercers above (including the thinking_length_penalty_coef <= 1.0 upper
371
+ # bound), so no re-check is needed here.
372
+ # lora_alpha scales the adapter contribution; 0 (or negative) trains a paid run
373
+ # that produces a no-op adapter (zero scaling at serve). Reject up front.
374
+ if spec.train.lora_alpha <= 0:
375
+ raise ConfigError("train.lora_alpha must be positive")