freesolo-flash-dev 0.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flash/__init__.py +29 -0
- flash/_channel.py +23 -0
- flash/_fileio.py +35 -0
- flash/_logging.py +49 -0
- flash/_update_check.py +266 -0
- flash/catalog.py +253 -0
- flash/cli/__init__.py +1 -0
- flash/cli/main/__init__.py +227 -0
- flash/cli/main/__main__.py +6 -0
- flash/cli/main/commands.py +636 -0
- flash/cli/main/envpush.py +317 -0
- flash/cli/main/render.py +599 -0
- flash/cli/main/training_doc.py +455 -0
- flash/client/__init__.py +14 -0
- flash/client/config.py +70 -0
- flash/client/http.py +372 -0
- flash/client/runtime_secrets.py +69 -0
- flash/client/specs.py +20 -0
- flash/cost/__init__.py +16 -0
- flash/cost/analytical.py +175 -0
- flash/cost/facts.py +114 -0
- flash/cost/spec.py +113 -0
- flash/cost/types.py +158 -0
- flash/engine/__init__.py +6 -0
- flash/engine/accounting.py +36 -0
- flash/engine/chalk_kernels.py +116 -0
- flash/engine/multiturn_rollout.py +780 -0
- flash/engine/recipe.py +86 -0
- flash/engine/vram.py +603 -0
- flash/engine/worker/__init__.py +2916 -0
- flash/engine/worker/__main__.py +4 -0
- flash/engine/worker/kernel_warmup.py +400 -0
- flash/engine/worker/lora.py +796 -0
- flash/engine/worker/packing.py +366 -0
- flash/engine/worker/perf.py +1048 -0
- flash/envs/__init__.py +10 -0
- flash/envs/adapter/__init__.py +883 -0
- flash/envs/adapter/rubric.py +222 -0
- flash/envs/base.py +52 -0
- flash/envs/registry.py +62 -0
- flash/mcp/__init__.py +1 -0
- flash/mcp/server.py +85 -0
- flash/providers/__init__.py +59 -0
- flash/providers/_auth.py +24 -0
- flash/providers/_http.py +230 -0
- flash/providers/_instance.py +416 -0
- flash/providers/_instance_bootstrap.py +517 -0
- flash/providers/_poll.py +311 -0
- flash/providers/allocator.py +193 -0
- flash/providers/base.py +431 -0
- flash/providers/hyperstack/__init__.py +127 -0
- flash/providers/hyperstack/api.py +522 -0
- flash/providers/hyperstack/auth.py +17 -0
- flash/providers/hyperstack/gpus.py +29 -0
- flash/providers/hyperstack/jobs/__init__.py +632 -0
- flash/providers/hyperstack/jobs/builders.py +122 -0
- flash/providers/hyperstack/preflight.py +23 -0
- flash/providers/hyperstack/pricing.py +26 -0
- flash/providers/hyperstack/train.py +25 -0
- flash/providers/lambdalabs/__init__.py +139 -0
- flash/providers/lambdalabs/api.py +261 -0
- flash/providers/lambdalabs/auth.py +18 -0
- flash/providers/lambdalabs/gpus.py +29 -0
- flash/providers/lambdalabs/jobs/__init__.py +724 -0
- flash/providers/lambdalabs/jobs/builders.py +118 -0
- flash/providers/lambdalabs/preflight.py +27 -0
- flash/providers/lambdalabs/pricing.py +51 -0
- flash/providers/lambdalabs/train.py +27 -0
- flash/providers/preflight.py +55 -0
- flash/providers/realized.py +80 -0
- flash/providers/runpod/__init__.py +130 -0
- flash/providers/runpod/api.py +186 -0
- flash/providers/runpod/auth.py +37 -0
- flash/providers/runpod/cost.py +57 -0
- flash/providers/runpod/gpus.py +46 -0
- flash/providers/runpod/jobs.py +956 -0
- flash/providers/runpod/keys.py +139 -0
- flash/providers/runpod/preflight.py +30 -0
- flash/providers/runpod/preload.py +915 -0
- flash/providers/runpod/pricing.py +18 -0
- flash/providers/runpod/slots.py +79 -0
- flash/providers/runpod/train/__init__.py +150 -0
- flash/providers/runpod/train/deps.py +395 -0
- flash/providers/runpod/train/endpoints.py +820 -0
- flash/py.typed +0 -0
- flash/runner/__init__.py +686 -0
- flash/runner/checkpoints.py +82 -0
- flash/runner/deploy.py +422 -0
- flash/runner/lifecycle.py +672 -0
- flash/schema/__init__.py +375 -0
- flash/schema/fields.py +331 -0
- flash/serve/__init__.py +1 -0
- flash/serve/deploy.py +326 -0
- flash/serve/pricing.py +60 -0
- flash/server/__init__.py +1 -0
- flash/server/__main__.py +20 -0
- flash/server/app.py +961 -0
- flash/server/auth.py +263 -0
- flash/server/billing.py +124 -0
- flash/server/checkpoints.py +110 -0
- flash/server/db.py +160 -0
- flash/server/environment_registry.py +102 -0
- flash/server/envs.py +360 -0
- flash/server/reconcile.py +163 -0
- flash/server/run_registry.py +150 -0
- flash/spec.py +333 -0
- freesolo_flash_dev-0.2.25.dist-info/METADATA +192 -0
- freesolo_flash_dev-0.2.25.dist-info/RECORD +111 -0
- freesolo_flash_dev-0.2.25.dist-info/WHEEL +4 -0
- freesolo_flash_dev-0.2.25.dist-info/entry_points.txt +3 -0
- freesolo_flash_dev-0.2.25.dist-info/licenses/LICENSE +201 -0
flash/engine/recipe.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Frozen, shared Flash fine-tuning recipe.
|
|
2
|
+
|
|
3
|
+
Single source of truth for the default fine-tuning hyperparameters: base model,
|
|
4
|
+
tokenizer, data, LoRA config, optimization, token budget, and decoding.
|
|
5
|
+
Per-run TOML configs (parsed into a ``JobSpec``) override the relevant fields.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
# ----------------------------------------------------------------------------
|
|
13
|
+
# Model identity
|
|
14
|
+
# ----------------------------------------------------------------------------
|
|
15
|
+
# Recipe fallback base model. The worker resolves JOB_SPEC.model (carried by the full
|
|
16
|
+
# JobSpec) first and only falls back to RECIPE.hf_model_id; this literal is the
|
|
17
|
+
# last-resort default when the spec carries no model.
|
|
18
|
+
# Keep it in sync with catalog.DEFAULT_MODEL (a proven dense text-only instruction model
|
|
19
|
+
# that loads on the current worker stack: transformers 5.x / TRL 1.x / vLLM 0.19.x; the
|
|
20
|
+
# natively-multimodal Qwen3.5/3.6 checkpoints are also catalog'd, trained/served text-only).
|
|
21
|
+
HF_MODEL_ID = "Qwen/Qwen3.5-4B" # catalog DEFAULT_MODEL
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ----------------------------------------------------------------------------
|
|
25
|
+
# LoRA (rank is the main user-controllable knob)
|
|
26
|
+
# ----------------------------------------------------------------------------
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class LoRAConfig:
|
|
29
|
+
rank: int = 32
|
|
30
|
+
alpha: int = 64
|
|
31
|
+
dropout: float = 0.0
|
|
32
|
+
# The worker adapts all linear projections ("all-linear" — see engine.worker);
|
|
33
|
+
# `rank`/`alpha` are the main user-controllable knobs here.
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ----------------------------------------------------------------------------
|
|
37
|
+
# SFT (Phase 1)
|
|
38
|
+
# ----------------------------------------------------------------------------
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class SFTConfig:
|
|
41
|
+
max_seq_len: int = 1024
|
|
42
|
+
# Thinking-mode sequence cap: <think> traces in targets need headroom. A deliberate
|
|
43
|
+
# consumer-GPU compromise (SFT cost/VRAM scales with sequence length).
|
|
44
|
+
max_seq_len_thinking: int = 2048
|
|
45
|
+
learning_rate: float = 1e-4
|
|
46
|
+
warmup_frac: float = 0.03
|
|
47
|
+
# Effective batch = per_device_batch * grad_accum (Arm A) / batch of datums (Arm B)
|
|
48
|
+
effective_batch: int = 32
|
|
49
|
+
num_epochs: int = 2
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ----------------------------------------------------------------------------
|
|
53
|
+
# RL / GRPO (Phase 2)
|
|
54
|
+
# ----------------------------------------------------------------------------
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class RLConfig:
|
|
57
|
+
learning_rate: float = 1e-5
|
|
58
|
+
# Default engine prompt budget. 512 was too small for real envs with non-trivial system
|
|
59
|
+
# prompts (e.g. a schema/instructions block + the user query), which made every prompt
|
|
60
|
+
# overflow before training started. 2048 fits typical instruction prompts; the run's
|
|
61
|
+
# [train].max_length sets the engine length explicitly when it needs more/less.
|
|
62
|
+
max_prompt_len: int = 2048
|
|
63
|
+
max_completion_len: int = 320
|
|
64
|
+
# Thinking-mode completion budget: <think> blocks consume most of it (phase 0
|
|
65
|
+
# showed 320 is hopeless — every completion hit the cap). 1536 is a consumer-GPU
|
|
66
|
+
# compromise (KV cache + rollout cost scale linearly with completion length, ~5x
|
|
67
|
+
# tokens/step vs non-thinking); the run's [train].max_tokens overrides it explicitly.
|
|
68
|
+
max_completion_len_thinking: int = 1536
|
|
69
|
+
prompts_per_step: int = 64
|
|
70
|
+
group_size: int = 8 # G completions per prompt
|
|
71
|
+
num_steps: int = 150 # overridable per-run via the TOML `train.steps`
|
|
72
|
+
sampling_temperature: float = 1.0 # on-policy sampling for rollouts
|
|
73
|
+
sampling_top_p: float = 1.0
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass(frozen=True)
|
|
77
|
+
class Recipe:
|
|
78
|
+
"""The complete shared recipe."""
|
|
79
|
+
|
|
80
|
+
hf_model_id: str = HF_MODEL_ID
|
|
81
|
+
lora: LoRAConfig = field(default_factory=LoRAConfig)
|
|
82
|
+
sft: SFTConfig = field(default_factory=SFTConfig)
|
|
83
|
+
rl: RLConfig = field(default_factory=RLConfig)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
RECIPE = Recipe()
|