freesolo-flash-dev 0.2.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flash/__init__.py +29 -0
- flash/_channel.py +23 -0
- flash/_fileio.py +35 -0
- flash/_logging.py +49 -0
- flash/_update_check.py +266 -0
- flash/catalog.py +253 -0
- flash/cli/__init__.py +1 -0
- flash/cli/main/__init__.py +227 -0
- flash/cli/main/__main__.py +6 -0
- flash/cli/main/commands.py +636 -0
- flash/cli/main/envpush.py +317 -0
- flash/cli/main/render.py +599 -0
- flash/cli/main/training_doc.py +455 -0
- flash/client/__init__.py +14 -0
- flash/client/config.py +70 -0
- flash/client/http.py +372 -0
- flash/client/runtime_secrets.py +69 -0
- flash/client/specs.py +20 -0
- flash/cost/__init__.py +16 -0
- flash/cost/analytical.py +175 -0
- flash/cost/facts.py +114 -0
- flash/cost/spec.py +113 -0
- flash/cost/types.py +158 -0
- flash/engine/__init__.py +6 -0
- flash/engine/accounting.py +36 -0
- flash/engine/chalk_kernels.py +116 -0
- flash/engine/multiturn_rollout.py +780 -0
- flash/engine/recipe.py +86 -0
- flash/engine/vram.py +603 -0
- flash/engine/worker/__init__.py +2916 -0
- flash/engine/worker/__main__.py +4 -0
- flash/engine/worker/kernel_warmup.py +400 -0
- flash/engine/worker/lora.py +796 -0
- flash/engine/worker/packing.py +366 -0
- flash/engine/worker/perf.py +1048 -0
- flash/envs/__init__.py +10 -0
- flash/envs/adapter/__init__.py +883 -0
- flash/envs/adapter/rubric.py +222 -0
- flash/envs/base.py +52 -0
- flash/envs/registry.py +62 -0
- flash/mcp/__init__.py +1 -0
- flash/mcp/server.py +85 -0
- flash/providers/__init__.py +59 -0
- flash/providers/_auth.py +24 -0
- flash/providers/_http.py +230 -0
- flash/providers/_instance.py +416 -0
- flash/providers/_instance_bootstrap.py +517 -0
- flash/providers/_poll.py +311 -0
- flash/providers/allocator.py +193 -0
- flash/providers/base.py +431 -0
- flash/providers/hyperstack/__init__.py +127 -0
- flash/providers/hyperstack/api.py +522 -0
- flash/providers/hyperstack/auth.py +17 -0
- flash/providers/hyperstack/gpus.py +29 -0
- flash/providers/hyperstack/jobs/__init__.py +632 -0
- flash/providers/hyperstack/jobs/builders.py +122 -0
- flash/providers/hyperstack/preflight.py +23 -0
- flash/providers/hyperstack/pricing.py +26 -0
- flash/providers/hyperstack/train.py +25 -0
- flash/providers/lambdalabs/__init__.py +139 -0
- flash/providers/lambdalabs/api.py +261 -0
- flash/providers/lambdalabs/auth.py +18 -0
- flash/providers/lambdalabs/gpus.py +29 -0
- flash/providers/lambdalabs/jobs/__init__.py +724 -0
- flash/providers/lambdalabs/jobs/builders.py +118 -0
- flash/providers/lambdalabs/preflight.py +27 -0
- flash/providers/lambdalabs/pricing.py +51 -0
- flash/providers/lambdalabs/train.py +27 -0
- flash/providers/preflight.py +55 -0
- flash/providers/realized.py +80 -0
- flash/providers/runpod/__init__.py +130 -0
- flash/providers/runpod/api.py +186 -0
- flash/providers/runpod/auth.py +37 -0
- flash/providers/runpod/cost.py +57 -0
- flash/providers/runpod/gpus.py +46 -0
- flash/providers/runpod/jobs.py +956 -0
- flash/providers/runpod/keys.py +139 -0
- flash/providers/runpod/preflight.py +30 -0
- flash/providers/runpod/preload.py +915 -0
- flash/providers/runpod/pricing.py +18 -0
- flash/providers/runpod/slots.py +79 -0
- flash/providers/runpod/train/__init__.py +150 -0
- flash/providers/runpod/train/deps.py +395 -0
- flash/providers/runpod/train/endpoints.py +820 -0
- flash/py.typed +0 -0
- flash/runner/__init__.py +686 -0
- flash/runner/checkpoints.py +82 -0
- flash/runner/deploy.py +422 -0
- flash/runner/lifecycle.py +672 -0
- flash/schema/__init__.py +375 -0
- flash/schema/fields.py +331 -0
- flash/serve/__init__.py +1 -0
- flash/serve/deploy.py +326 -0
- flash/serve/pricing.py +60 -0
- flash/server/__init__.py +1 -0
- flash/server/__main__.py +20 -0
- flash/server/app.py +961 -0
- flash/server/auth.py +263 -0
- flash/server/billing.py +124 -0
- flash/server/checkpoints.py +110 -0
- flash/server/db.py +160 -0
- flash/server/environment_registry.py +102 -0
- flash/server/envs.py +360 -0
- flash/server/reconcile.py +163 -0
- flash/server/run_registry.py +150 -0
- flash/spec.py +333 -0
- freesolo_flash_dev-0.2.25.dist-info/METADATA +192 -0
- freesolo_flash_dev-0.2.25.dist-info/RECORD +111 -0
- freesolo_flash_dev-0.2.25.dist-info/WHEEL +4 -0
- freesolo_flash_dev-0.2.25.dist-info/entry_points.txt +3 -0
- freesolo_flash_dev-0.2.25.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""CLI for the managed Flash service.
|
|
2
|
+
|
|
3
|
+
Every run-lifecycle command is a thin HTTP call to the Flash control plane —
|
|
4
|
+
users authenticate with their freesolo API key (`flash login` verifies it against
|
|
5
|
+
the freesolo backend), never with provider credentials. Config parsing/validation
|
|
6
|
+
and `--dry-run` stay fully local.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
from flash import __version__
|
|
15
|
+
from flash._channel import CLI_NAME
|
|
16
|
+
from flash._logging import configure_logging, get_logger
|
|
17
|
+
from flash._update_check import emit_update_notice, maybe_start_update_check
|
|
18
|
+
|
|
19
|
+
# Command handlers + the patched client surface live in submodules; re-export them so
|
|
20
|
+
# `flash.cli.main` stays the single public import surface (and so monkeypatching
|
|
21
|
+
# `flash.cli.main.commands` reaches the bare globals the handlers read).
|
|
22
|
+
from flash.cli.main.commands import ( # noqa: F401
|
|
23
|
+
_CLI_DONE_STATES,
|
|
24
|
+
_OK_STATES,
|
|
25
|
+
_STARTER_ENV_PY,
|
|
26
|
+
_USER_ERRORS,
|
|
27
|
+
_follow_run,
|
|
28
|
+
_poll_logs,
|
|
29
|
+
client_from_config,
|
|
30
|
+
cmd_cancel,
|
|
31
|
+
cmd_chat,
|
|
32
|
+
cmd_checkpoints,
|
|
33
|
+
cmd_deploy,
|
|
34
|
+
cmd_deployments,
|
|
35
|
+
cmd_env_list,
|
|
36
|
+
cmd_env_setup,
|
|
37
|
+
cmd_gpus,
|
|
38
|
+
cmd_login,
|
|
39
|
+
cmd_models,
|
|
40
|
+
cmd_runs,
|
|
41
|
+
cmd_status,
|
|
42
|
+
cmd_train,
|
|
43
|
+
cmd_undeploy,
|
|
44
|
+
cmd_version,
|
|
45
|
+
cmd_whoami,
|
|
46
|
+
verify_freesolo_key,
|
|
47
|
+
)
|
|
48
|
+
from flash.cli.main.envpush import cmd_env_install, cmd_env_push
|
|
49
|
+
|
|
50
|
+
logger = get_logger("flash.cli.main")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def main(argv: list[str] | None = None) -> int:
|
|
54
|
+
parser = argparse.ArgumentParser(prog=CLI_NAME, description="Managed LoRA post-training")
|
|
55
|
+
parser.add_argument("-V", "--version", action="version", version=f"{CLI_NAME} {__version__}")
|
|
56
|
+
parser.add_argument(
|
|
57
|
+
"--debug",
|
|
58
|
+
action="store_true",
|
|
59
|
+
help="show full tracebacks on error",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"-v",
|
|
63
|
+
"--verbose",
|
|
64
|
+
action="count",
|
|
65
|
+
default=0,
|
|
66
|
+
help="increase log verbosity (-v for info, -vv for debug)",
|
|
67
|
+
)
|
|
68
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
69
|
+
|
|
70
|
+
version = sub.add_parser("version", help="print the Flash version")
|
|
71
|
+
version.set_defaults(func=cmd_version)
|
|
72
|
+
|
|
73
|
+
login = sub.add_parser(
|
|
74
|
+
"login",
|
|
75
|
+
help="log in with your freesolo API key (create one at https://freesolo.co/sign-in)",
|
|
76
|
+
)
|
|
77
|
+
login.add_argument(
|
|
78
|
+
"--api-key",
|
|
79
|
+
help="your freesolo API key (default: FREESOLO_API_KEY); create it at "
|
|
80
|
+
"https://freesolo.co/sign-in",
|
|
81
|
+
)
|
|
82
|
+
login.add_argument(
|
|
83
|
+
"--freesolo-url",
|
|
84
|
+
dest="freesolo_url",
|
|
85
|
+
help="freesolo backend base URL (default: FREESOLO_BASE_URL or https://api.freesolo.co)",
|
|
86
|
+
)
|
|
87
|
+
login.add_argument(
|
|
88
|
+
"--api-url", help="flash control-plane URL for training calls (default: FLASH_API_URL)"
|
|
89
|
+
)
|
|
90
|
+
login.set_defaults(func=cmd_login)
|
|
91
|
+
|
|
92
|
+
whoami = sub.add_parser("whoami", help="show the identity behind your stored key")
|
|
93
|
+
whoami.set_defaults(func=cmd_whoami)
|
|
94
|
+
|
|
95
|
+
models = sub.add_parser("models", help="list supported base models")
|
|
96
|
+
models.set_defaults(func=cmd_models)
|
|
97
|
+
|
|
98
|
+
gpus = sub.add_parser("gpus", help="list managed GPU classes with live $/hr")
|
|
99
|
+
gpus.set_defaults(func=cmd_gpus)
|
|
100
|
+
|
|
101
|
+
env = sub.add_parser("env", help="manage Freesolo environments")
|
|
102
|
+
env_sub = env.add_subparsers(dest="env_cmd", required=True)
|
|
103
|
+
setup = env_sub.add_parser("setup", help="create a starter Freesolo environment scaffold")
|
|
104
|
+
setup.set_defaults(func=cmd_env_setup)
|
|
105
|
+
|
|
106
|
+
env_list = env_sub.add_parser("list", help="list installed + local environments")
|
|
107
|
+
env_list.set_defaults(func=cmd_env_list)
|
|
108
|
+
|
|
109
|
+
env_install = env_sub.add_parser("install", help="record a Freesolo environment")
|
|
110
|
+
env_install.add_argument("env_id", help="the Freesolo environment id to record")
|
|
111
|
+
env_install.set_defaults(func=cmd_env_install)
|
|
112
|
+
|
|
113
|
+
env_push = env_sub.add_parser("push", help="upload a local Freesolo environment")
|
|
114
|
+
env_push.add_argument(
|
|
115
|
+
"--name",
|
|
116
|
+
required=True,
|
|
117
|
+
help="Freesolo environment name to publish or update",
|
|
118
|
+
)
|
|
119
|
+
env_push.add_argument("path", nargs="?", default=".")
|
|
120
|
+
env_push.set_defaults(func=cmd_env_push)
|
|
121
|
+
|
|
122
|
+
train = sub.add_parser("train", help="submit a managed training run from a TOML config")
|
|
123
|
+
train.add_argument("config")
|
|
124
|
+
train.add_argument(
|
|
125
|
+
"--config",
|
|
126
|
+
dest="extra_configs",
|
|
127
|
+
action="append",
|
|
128
|
+
default=[],
|
|
129
|
+
help="additional TOML to deep-merge (config composition); repeatable",
|
|
130
|
+
)
|
|
131
|
+
train.add_argument(
|
|
132
|
+
"--set",
|
|
133
|
+
dest="overrides",
|
|
134
|
+
action="append",
|
|
135
|
+
default=[],
|
|
136
|
+
metavar="key=value",
|
|
137
|
+
help="override a config value; repeatable",
|
|
138
|
+
)
|
|
139
|
+
train.add_argument("--dry-run", action="store_true")
|
|
140
|
+
train.add_argument(
|
|
141
|
+
"--cost",
|
|
142
|
+
action="store_true",
|
|
143
|
+
help="print the pre-flight USD cost for the config and exit (no submit)",
|
|
144
|
+
)
|
|
145
|
+
train.add_argument(
|
|
146
|
+
"--background",
|
|
147
|
+
action="store_true",
|
|
148
|
+
help="submit and return immediately instead of following logs",
|
|
149
|
+
)
|
|
150
|
+
train.set_defaults(func=cmd_train)
|
|
151
|
+
|
|
152
|
+
status = sub.add_parser("status", help="show a run's status, logs, or follow logs")
|
|
153
|
+
status.add_argument("run_id")
|
|
154
|
+
status.add_argument(
|
|
155
|
+
"--logs",
|
|
156
|
+
action="store_true",
|
|
157
|
+
help="print current logs before status — the orchestrator log plus the train-subprocess "
|
|
158
|
+
"stdout + traceback (console_/error_<phase>.txt) fetched from the run's HF artifact repo",
|
|
159
|
+
)
|
|
160
|
+
status.add_argument(
|
|
161
|
+
"-f",
|
|
162
|
+
"--follow",
|
|
163
|
+
action="store_true",
|
|
164
|
+
help="stream logs until the run ends, then print final status",
|
|
165
|
+
)
|
|
166
|
+
status.set_defaults(func=cmd_status)
|
|
167
|
+
|
|
168
|
+
runs = sub.add_parser("runs", help="list runs and their state/cost")
|
|
169
|
+
runs.set_defaults(func=cmd_runs)
|
|
170
|
+
|
|
171
|
+
cancel = sub.add_parser("cancel", help="cancel a run")
|
|
172
|
+
cancel.add_argument("run_id")
|
|
173
|
+
cancel.set_defaults(func=cmd_cancel)
|
|
174
|
+
|
|
175
|
+
checkpoints = sub.add_parser(
|
|
176
|
+
"checkpoints", help="list a run's deployable per-step RL checkpoints"
|
|
177
|
+
)
|
|
178
|
+
checkpoints.add_argument("run_id")
|
|
179
|
+
checkpoints.set_defaults(func=cmd_checkpoints)
|
|
180
|
+
|
|
181
|
+
deploy = sub.add_parser("deploy")
|
|
182
|
+
deploy.add_argument("run_id")
|
|
183
|
+
deploy.add_argument("--dry-run", action="store_true")
|
|
184
|
+
deploy.add_argument(
|
|
185
|
+
"--step",
|
|
186
|
+
type=int,
|
|
187
|
+
default=None,
|
|
188
|
+
help="deploy a specific intermediate checkpoint (see `flash checkpoints <run_id>`) "
|
|
189
|
+
"instead of the run's final adapter; works even for a run cancelled mid-RL",
|
|
190
|
+
)
|
|
191
|
+
deploy.set_defaults(func=cmd_deploy)
|
|
192
|
+
|
|
193
|
+
undeploy = sub.add_parser("undeploy", help="tear down a run's serving endpoint")
|
|
194
|
+
undeploy.add_argument("run_id")
|
|
195
|
+
undeploy.set_defaults(func=cmd_undeploy)
|
|
196
|
+
|
|
197
|
+
deployments = sub.add_parser("deployments", help="list active serving deployments")
|
|
198
|
+
deployments.set_defaults(func=cmd_deployments)
|
|
199
|
+
|
|
200
|
+
chat = sub.add_parser("chat", help="chat with a deployed adapter")
|
|
201
|
+
chat.add_argument("run_id")
|
|
202
|
+
chat.add_argument("-m", "--message", required=True)
|
|
203
|
+
chat.add_argument("--max-tokens", type=int, default=512)
|
|
204
|
+
chat.add_argument("--temperature", type=float, default=0.0)
|
|
205
|
+
chat.set_defaults(func=cmd_chat)
|
|
206
|
+
|
|
207
|
+
# The control plane is operator-only and run as a separate one-off service via the
|
|
208
|
+
# `flash-server` console script (flash.server.__main__:main), not a `flash` subcommand.
|
|
209
|
+
|
|
210
|
+
args = parser.parse_args(argv)
|
|
211
|
+
configure_logging(verbosity=getattr(args, "verbose", 0))
|
|
212
|
+
debug = getattr(args, "debug", False)
|
|
213
|
+
# Kick off a once-a-day PyPI version check in the background; the "new release available"
|
|
214
|
+
# notice (if any) prints to stderr after the command output (see emit_update_notice).
|
|
215
|
+
update_check = maybe_start_update_check()
|
|
216
|
+
try:
|
|
217
|
+
return args.func(args)
|
|
218
|
+
except _USER_ERRORS as exc:
|
|
219
|
+
if debug:
|
|
220
|
+
raise
|
|
221
|
+
print(f"error: {exc}", file=sys.stderr)
|
|
222
|
+
return 1
|
|
223
|
+
except KeyboardInterrupt:
|
|
224
|
+
print("aborted", file=sys.stderr)
|
|
225
|
+
return 130
|
|
226
|
+
finally:
|
|
227
|
+
emit_update_notice(update_check)
|