freesolo-flash-dev 0.2.25__tar.gz → 0.2.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.env.example +5 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/bake-kernel-cache.yml +20 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/publish-dev.yml +17 -3
- freesolo_flash_dev-0.2.26/.github/workflows/version-parity.yml +49 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/worker-image.yml +12 -0
- freesolo_flash_dev-0.2.26/Dockerfile +42 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/Dockerfile.worker +3 -3
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/PKG-INFO +9 -6
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/README.md +8 -5
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/bake_kernel_cache.py +16 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/bake_pod_entry.py +5 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/README.md +2 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/generate.py +4 -4
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/__init__.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_channel.py +7 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/catalog.py +73 -2
- {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/__init__.py +39 -5
- {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/__main__.py +1 -1
- {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/commands.py +34 -2
- {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/render.py +4 -8
- {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/training_doc.py +0 -3
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/__init__.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/config.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/http.py +34 -5
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/runtime_secrets.py +23 -6
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/analytical.py +4 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/facts.py +24 -11
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/spec.py +0 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/accounting.py +0 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/chalk_kernels.py +1 -6
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/recipe.py +0 -12
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/vram.py +42 -9
- freesolo_flash_dev-0.2.26/flash/engine/worker/__init__.py +589 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/_pkg.py +30 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/adapter.py +187 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/decoding.py +144 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/finalize.py +68 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/gpu_setup.py +101 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/grpo.py +297 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/heartbeat.py +322 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/hf.py +526 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/kernel_warmup.py +23 -18
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/lora.py +42 -66
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/packing.py +127 -7
- freesolo_flash_dev-0.2.26/flash/engine/worker/perf/__init__.py +495 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/perf/attn.py +176 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/perf/diagnostics.py +240 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/perf/lifecycle.py +274 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/perf/liger.py +73 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/perf/loraplus.py +40 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/perf/memory.py +84 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/rl.py +912 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/sft.py +683 -0
- freesolo_flash_dev-0.2.26/flash/engine/worker/wandb_log.py +145 -0
- freesolo_flash_dev-0.2.25/flash/envs/adapter/__init__.py → freesolo_flash_dev-0.2.26/flash/envs/adapter.py +7 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/__init__.py +5 -10
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_auth.py +0 -8
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_http.py +3 -3
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_instance.py +108 -51
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_instance_bootstrap.py +35 -37
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_poll.py +147 -14
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/allocator.py +9 -37
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/base.py +61 -50
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/__init__.py +18 -5
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/jobs/__init__.py +137 -30
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/jobs/builders.py +4 -3
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/pricing.py +1 -0
- freesolo_flash_dev-0.2.26/flash/providers/preflight.py +92 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/realized.py +5 -5
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/__init__.py +8 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/api.py +100 -8
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/gpus.py +0 -5
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/jobs.py +180 -72
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/keys.py +15 -9
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/preload.py +42 -140
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/train/__init__.py +1 -50
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/train/deps.py +119 -53
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/train/endpoints.py +39 -29
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/__init__.py +148 -23
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/lifecycle.py +50 -40
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/schema/__init__.py +3 -3
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/serve/deploy.py +6 -0
- freesolo_flash_dev-0.2.26/flash/serve/export.py +176 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/serve/pricing.py +3 -0
- freesolo_flash_dev-0.2.26/flash/server/_deps.py +115 -0
- freesolo_flash_dev-0.2.26/flash/server/_locks.py +54 -0
- freesolo_flash_dev-0.2.26/flash/server/_runtime.py +238 -0
- freesolo_flash_dev-0.2.26/flash/server/app.py +371 -0
- freesolo_flash_dev-0.2.26/flash/server/billing_retry.py +122 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/envs.py +3 -13
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/reconcile.py +1 -1
- freesolo_flash_dev-0.2.26/flash/server/routes/__init__.py +6 -0
- freesolo_flash_dev-0.2.26/flash/server/routes/envs.py +48 -0
- freesolo_flash_dev-0.2.26/flash/server/routes/meta.py +42 -0
- freesolo_flash_dev-0.2.26/flash/server/routes/runs.py +175 -0
- freesolo_flash_dev-0.2.26/flash/server/routes/serving.py +427 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/spec.py +4 -3
- freesolo_flash_dev-0.2.26/infisical-entrypoint.sh +45 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/pyproject.toml +9 -8
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/conftest.py +5 -6
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/conftest.py +2 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_agent_flash_cli_contract.py +2 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_allocator.py +37 -18
- freesolo_flash_dev-0.2.26/tests/test_billing_retry.py +803 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_checkpoints.py +197 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_commands.py +59 -19
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_errors.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_estimate.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_managed.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_render_theme.py +2 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_client.py +44 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_client_server_integration.py +25 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_config_overrides.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_analytical.py +23 -5
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_equation.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_hardware.py +17 -12
- freesolo_flash_dev-0.2.26/tests/test_cost_models.py +64 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_dev_channel.py +11 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_disk_gb.py +9 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_install.py +2 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_push.py +2 -2
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_envs_coverage.py +2 -2
- freesolo_flash_dev-0.2.26/tests/test_export.py +466 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_flash_mvp.py +1 -27
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_flash_worker.py +152 -17
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_gpus.py +9 -8
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_grpo_params.py +249 -0
- freesolo_flash_dev-0.2.26/tests/test_idle_endpoint_reaper.py +478 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_jobs.py +284 -26
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_lambda_runner.py +344 -19
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_login_perms.py +2 -2
- freesolo_flash_dev-0.2.26/tests/test_mig_guard.py +284 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_packing.py +352 -0
- freesolo_flash_dev-0.2.26/tests/test_poll_helpers.py +95 -0
- freesolo_flash_dev-0.2.26/tests/test_preflight.py +173 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_provider_routing.py +99 -32
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_provider_teardown_robustness.py +16 -63
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_providers_symmetry.py +38 -8
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_realized_cost.py +1 -1
- freesolo_flash_dev-0.2.26/tests/test_resume_on_retry.py +389 -0
- freesolo_flash_dev-0.2.26/tests/test_runpod_key_fingerprint.py +75 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runpod_key_waterfall.py +29 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_serve.py +49 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_server_api.py +289 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_server_billing.py +23 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_server_db.py +20 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_thinking_config.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_version.py +1 -1
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_vl_weight_sync.py +27 -3
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_weight_cache.py +264 -198
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_dryrun.py +35 -24
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_image.py +20 -1
- freesolo_flash_dev-0.2.26/tests/test_worker_init_heartbeat.py +357 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_stack.py +202 -10
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/uv.lock +1 -1
- freesolo_flash_dev-0.2.25/Dockerfile +0 -24
- freesolo_flash_dev-0.2.25/flash/cli/__init__.py +0 -1
- freesolo_flash_dev-0.2.25/flash/engine/worker/__init__.py +0 -2916
- freesolo_flash_dev-0.2.25/flash/engine/worker/perf.py +0 -1048
- freesolo_flash_dev-0.2.25/flash/envs/adapter/rubric.py +0 -222
- freesolo_flash_dev-0.2.25/flash/mcp/__init__.py +0 -1
- freesolo_flash_dev-0.2.25/flash/mcp/server.py +0 -85
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/__init__.py +0 -127
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/api.py +0 -522
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/auth.py +0 -17
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/gpus.py +0 -29
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/__init__.py +0 -632
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/builders.py +0 -122
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/preflight.py +0 -23
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/pricing.py +0 -26
- freesolo_flash_dev-0.2.25/flash/providers/hyperstack/train.py +0 -25
- freesolo_flash_dev-0.2.25/flash/providers/preflight.py +0 -55
- freesolo_flash_dev-0.2.25/flash/server/app.py +0 -961
- freesolo_flash_dev-0.2.25/tests/live/test_hyperstack_live.py +0 -50
- freesolo_flash_dev-0.2.25/tests/test_cost_models.py +0 -36
- freesolo_flash_dev-0.2.25/tests/test_hyperstack_runner.py +0 -1031
- freesolo_flash_dev-0.2.25/tests/test_idle_endpoint_reaper.py +0 -285
- freesolo_flash_dev-0.2.25/tests/test_mig_guard.py +0 -70
- freesolo_flash_dev-0.2.25/tests/test_poll_helpers.py +0 -52
- freesolo_flash_dev-0.2.25/tests/test_preflight.py +0 -98
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.dockerignore +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/ci.yml +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/main-source-guard.yml +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/publish-image.yml +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/publish.yml +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.gitignore +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/LICENSE +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/build/kernel_cache/.gitignore +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/build/kernel_cache/.keep +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/Dockerfile.kernelcache +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/make_rp_handler.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/index.html +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/preview.png +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/kernel-cache.md +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_fileio.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_logging.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_update_check.py +0 -0
- {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/envpush.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/specs.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/types.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/multiturn_rollout.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/__main__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/envs/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/envs/base.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/envs/registry.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/api.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/auth.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/gpus.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/preflight.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/train.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/auth.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/cost.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/preflight.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/pricing.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/slots.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/py.typed +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/checkpoints.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/deploy.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/schema/fields.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/serve/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/__main__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/auth.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/billing.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/checkpoints.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/db.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/environment_registry.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/run_registry.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/scripts/build_dev_dist.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/_helpers/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/_helpers/runner.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/_helpers/specs.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/fixtures/math_eval.jsonl +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/fixtures/math_train.jsonl +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/__init__.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/test_lambda_live.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/test_runpod_live.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_algorithms.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_backend_jobspec_contract.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cancel_remote.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_catalog_consistency.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_chalk_kernels.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_estimate.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_rewards.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_endpoint_name.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_publish.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_rate_limit_resolve.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_github_urlopen_retry.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_grpo_mask_aware.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_grpo_sleep_gate.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_kernel_cache.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_kv_util.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_logging.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_managed_hf_repo.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_metrics_schema_agent_contract.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_multiturn_rollout.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_open_model_policy.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_orchestrator_flash.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_resolve_params_b.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runmgmt.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runpod_api_delete.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runpod_slots.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_serving_contract.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_spec_and_validation.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_update_check.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_verifiers.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_vl_warmstart_adapter_keys.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_wandb_naming.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_warmstart_cross_repo.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_hardexit.py +0 -0
- {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_thinking.py +0 -0
|
@@ -5,6 +5,11 @@
|
|
|
5
5
|
# GPU substrate. RunPod is the default; Vast is opt-in (only required when set).
|
|
6
6
|
RUNPOD_API_KEY=
|
|
7
7
|
VAST_API_KEY=
|
|
8
|
+
# Use the per-arch baked worker images (cu128-<sm>) to skip the ~10-15 min cold-start JIT. Requires
|
|
9
|
+
# the per-SM images published first (.github/workflows/bake-kernel-cache.yml), and a manual re-bake
|
|
10
|
+
# after any worker-deps change. The control-plane Dockerfile sets this to 1 by default; uncomment to
|
|
11
|
+
# enable for a bare `flash-server` deploy.
|
|
12
|
+
# FLASH_WORKER_IMAGE_PER_SM=1
|
|
8
13
|
# HuggingFace token with write access to each run's [train] hf_repo (code upload +
|
|
9
14
|
# streamed checkpoints/adapters land in that per-run dataset repo). The artifact repo
|
|
10
15
|
# is per-run (set in the run TOML's [train] hf_repo), not an operator-wide env var.
|
{freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/bake-kernel-cache.yml
RENAMED
|
@@ -56,11 +56,24 @@ jobs:
|
|
|
56
56
|
gpu_type_id: "NVIDIA H100 80GB HBM3",
|
|
57
57
|
allowed_cuda: "",
|
|
58
58
|
}
|
|
59
|
-
# Blackwell needs CUDA-13 hosts to JIT its PTX (matches min_cuda_for in the provider).
|
|
59
|
+
# Blackwell needs CUDA-13 hosts to JIT its PTX (matches min_cuda_for in the provider). Bake
|
|
60
|
+
# sm120 on the RTX Pro 6000 (Server Edition): it's the same sm120 (cache is sm-keyed, so it's
|
|
61
|
+
# valid for RTX 5090 too) but has far better secure-cloud on-demand capacity -- the RTX 5090
|
|
62
|
+
# pool repeatedly returned "machine does not have the resources" at create_pod.
|
|
60
63
|
- {
|
|
61
64
|
sm: sm120,
|
|
62
65
|
arch: "12.0",
|
|
63
|
-
gpu_type_id: "NVIDIA
|
|
66
|
+
gpu_type_id: "NVIDIA RTX PRO 6000 Blackwell Server Edition",
|
|
67
|
+
allowed_cuda: "13.0",
|
|
68
|
+
}
|
|
69
|
+
# Datacenter Blackwell (sm100, distinct from the sm120 RTX Pro 6000). B200 has 180 GB and
|
|
70
|
+
# good secure-cloud capacity. CUDA-13 host like the other Blackwell parts. NOT in the default
|
|
71
|
+
# `sms` list yet (B200 is unvalidated) -- bake it explicitly via `sms=sm100` until a smoke
|
|
72
|
+
# passes, then add it to the default.
|
|
73
|
+
- {
|
|
74
|
+
sm: sm100,
|
|
75
|
+
arch: "10.0",
|
|
76
|
+
gpu_type_id: "NVIDIA B200",
|
|
64
77
|
allowed_cuda: "13.0",
|
|
65
78
|
}
|
|
66
79
|
steps:
|
|
@@ -80,6 +93,11 @@ jobs:
|
|
|
80
93
|
- name: Install uv
|
|
81
94
|
if: steps.gate.outputs.run == 'true'
|
|
82
95
|
uses: astral-sh/setup-uv@v5
|
|
96
|
+
with:
|
|
97
|
+
# no cache: the post-run cache-prune calls `uv`, but the "Free disk space" step deletes
|
|
98
|
+
# /opt/hostedtoolcache (where setup-uv put uv), so the post step fails ("uv not found") and
|
|
99
|
+
# marks the whole job red even though the bake + push already succeeded.
|
|
100
|
+
enable-cache: false
|
|
83
101
|
|
|
84
102
|
- name: Sync deps (flash + runpod + hf)
|
|
85
103
|
if: steps.gate.outputs.run == 'true'
|
|
@@ -8,7 +8,8 @@ name: Publish flash dev-channel package
|
|
|
8
8
|
# merging to `dev` cuts a release; ordinary dev pushes (version unchanged -> already published)
|
|
9
9
|
# no-op. This mirrors freesolo-flash's publish.yml, but keyed on the dev version and on `dev`
|
|
10
10
|
# instead of `main`, and a no-op is a clean success (not a failure) since most dev pushes don't
|
|
11
|
-
# bump it. Manual runs via workflow_dispatch
|
|
11
|
+
# bump it. Manual runs via workflow_dispatch re-run the same version check for the current dev
|
|
12
|
+
# version (still a no-op when it's already on PyPI — not a forced re-publish).
|
|
12
13
|
on:
|
|
13
14
|
push:
|
|
14
15
|
branches:
|
|
@@ -45,15 +46,28 @@ jobs:
|
|
|
45
46
|
with:
|
|
46
47
|
python-version: "3.11"
|
|
47
48
|
|
|
48
|
-
- name: Read dev-channel version
|
|
49
|
+
- name: Read dev-channel version (and enforce parity with the prod version)
|
|
49
50
|
id: meta
|
|
50
51
|
run: |
|
|
51
52
|
python3 - <<'PY' >> "$GITHUB_OUTPUT"
|
|
53
|
+
import sys
|
|
52
54
|
import tomllib
|
|
53
55
|
|
|
54
56
|
with open("pyproject.toml", "rb") as f:
|
|
55
57
|
data = tomllib.load(f)
|
|
56
|
-
|
|
58
|
+
dev_version = data["tool"]["flash-dev"]["version"]
|
|
59
|
+
prod_version = data["project"]["version"]
|
|
60
|
+
# The two channels MUST ship in lockstep (version-parity.yml enforces this on PRs). Re-check
|
|
61
|
+
# it here so the PUBLISH path can never ship a dev wheel out of sync with freesolo-flash even
|
|
62
|
+
# if the parity CI was bypassed or branch protection isn't strict — fail before build/publish.
|
|
63
|
+
if dev_version != prod_version:
|
|
64
|
+
print(
|
|
65
|
+
f"::error::version mismatch: [tool.flash-dev].version={dev_version} != "
|
|
66
|
+
f"[project].version={prod_version}; bump both in lockstep before publishing.",
|
|
67
|
+
file=sys.stderr,
|
|
68
|
+
)
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
print(f"version={dev_version}")
|
|
57
71
|
PY
|
|
58
72
|
|
|
59
73
|
- name: Decide whether to publish
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: Version parity
|
|
2
|
+
|
|
3
|
+
# Keep the two release channels pinned to the same version: the prod package `freesolo-flash`
|
|
4
|
+
# (pyproject `[project].version`) and the dev-channel package `freesolo-flash-dev`
|
|
5
|
+
# (`[tool.flash-dev].version`). They publish from `main` and `dev` respectively, so a divergence
|
|
6
|
+
# would ship two channels claiming different versions. Bump both together.
|
|
7
|
+
on:
|
|
8
|
+
push:
|
|
9
|
+
branches: [main, dev]
|
|
10
|
+
pull_request:
|
|
11
|
+
branches: [main, dev]
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
versions-match:
|
|
18
|
+
name: dev and main at the same version
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v6
|
|
22
|
+
|
|
23
|
+
# tomllib is stdlib only since 3.11; the runner's default python3 may predate that.
|
|
24
|
+
- name: Set up Python
|
|
25
|
+
uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: "3.11"
|
|
28
|
+
|
|
29
|
+
- name: Compare channel versions
|
|
30
|
+
run: |
|
|
31
|
+
python3 - <<'PY'
|
|
32
|
+
import sys
|
|
33
|
+
import tomllib
|
|
34
|
+
|
|
35
|
+
with open("pyproject.toml", "rb") as f:
|
|
36
|
+
data = tomllib.load(f)
|
|
37
|
+
|
|
38
|
+
prod = data["project"]["version"]
|
|
39
|
+
dev = data["tool"]["flash-dev"]["version"]
|
|
40
|
+
if prod != dev:
|
|
41
|
+
print(
|
|
42
|
+
"::error::Channel version mismatch: "
|
|
43
|
+
f"[project].version={prod} (freesolo-flash) != "
|
|
44
|
+
f"[tool.flash-dev].version={dev} (freesolo-flash-dev). "
|
|
45
|
+
"Bump both to the same version."
|
|
46
|
+
)
|
|
47
|
+
sys.exit(1)
|
|
48
|
+
print(f"OK: freesolo-flash and freesolo-flash-dev are both at {prod}.")
|
|
49
|
+
PY
|
|
@@ -83,3 +83,15 @@ jobs:
|
|
|
83
83
|
build-args: |
|
|
84
84
|
FLASH_ATTN_SPEC=https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.0/flash_attn-2.8.3%2Bcu128torch2.10-cp312-cp312-linux_x86_64.whl
|
|
85
85
|
FLASH_ATTN_3_SPEC=${{ github.event.inputs.flash_attn_3_spec || 'https://github.com/windreamer/flash-attention3-wheels/releases/download/2026.03.19-850211f/flash_attn_3-3.0.0%2B20260318.cu128torch2100cxx11abitrue.8afc61-cp39-abi3-linux_x86_64.whl' }}
|
|
86
|
+
|
|
87
|
+
# The per-arch baked images (cu128-<sm>) are rebaked MANUALLY (bake-kernel-cache.yml). When this
|
|
88
|
+
# rebuilds :cu128, those tags go stale -> prompt a rebake. Only when the cu128 tag was (re)built.
|
|
89
|
+
- name: Remind to rebake per-SM kernel-cache images
|
|
90
|
+
if: ${{ success() && (github.event.inputs.tag || 'cu128') == 'cu128' }}
|
|
91
|
+
run: |
|
|
92
|
+
echo "::warning title=Rebake per-SM images::A new :cu128 base was published. If FLASH_WORKER_IMAGE_PER_SM=1 is enabled, run bake-kernel-cache.yml to refresh cu128-sm{80,86,89,90,120}, else workers run stale baked deps."
|
|
93
|
+
{
|
|
94
|
+
echo "### ⚠️ Per-SM kernel-cache images may now be stale"
|
|
95
|
+
echo ""
|
|
96
|
+
echo "A fresh \`:cu128\` base was just published. If \`FLASH_WORKER_IMAGE_PER_SM=1\` is enabled, re-run **bake-kernel-cache.yml** to refresh the per-arch \`cu128-sm*\` tags, or workers will run the previous baked deps."
|
|
97
|
+
} >> "$GITHUB_STEP_SUMMARY"
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Flash control plane (operator-side).
|
|
2
|
+
#
|
|
3
|
+
# docker build -t flash-control-plane .
|
|
4
|
+
# docker run -p 8080:8080 \
|
|
5
|
+
# -e RUNPOD_API_KEY=... -e HF_TOKEN=... \
|
|
6
|
+
# -v flash-state:/root/.flash flash-control-plane
|
|
7
|
+
#
|
|
8
|
+
# All persistent state (key DB, run records, results) lives under ~/.flash (fixed paths,
|
|
9
|
+
# = /root/.flash for the default root user) — mount a volume there. Run exactly ONE
|
|
10
|
+
# container instance per state volume (state is local files + SQLite; no horizontal scaling).
|
|
11
|
+
|
|
12
|
+
FROM python:3.12-slim
|
|
13
|
+
|
|
14
|
+
WORKDIR /app
|
|
15
|
+
COPY . .
|
|
16
|
+
RUN apt-get update \
|
|
17
|
+
&& apt-get install -y --no-install-recommends ca-certificates git curl \
|
|
18
|
+
&& curl -1sLf 'https://artifacts-cli.infisical.com/setup.deb.sh' | bash \
|
|
19
|
+
&& apt-get update && apt-get install -y --no-install-recommends infisical \
|
|
20
|
+
&& rm -rf /var/lib/apt/lists/* \
|
|
21
|
+
&& chmod +x /app/infisical-entrypoint.sh
|
|
22
|
+
RUN pip install --no-cache-dir ".[server]"
|
|
23
|
+
|
|
24
|
+
VOLUME /root/.flash
|
|
25
|
+
EXPOSE 8080
|
|
26
|
+
|
|
27
|
+
# Use the per-arch baked worker images (ghcr.io/.../flash-worker:cu128-<sm>) so cold workers skip the
|
|
28
|
+
# ~10-15 min first-use JIT; the allocator maps each GPU class to its matching -smXX tag. All validated
|
|
29
|
+
# SMs (sm80/86/89/90/120) are published. Rebakes are MANUAL -- after a Dockerfile.worker/deps change
|
|
30
|
+
# rebuilds :cu128, re-run bake-kernel-cache.yml so the -smXX tags don't ship stale deps (the
|
|
31
|
+
# worker-image build posts a reminder). Override at runtime with `-e FLASH_WORKER_IMAGE_PER_SM=0`.
|
|
32
|
+
#
|
|
33
|
+
# NOTE: this ENV is the default for BARE (non-Infisical) `flash-server` deploys. Under the Infisical
|
|
34
|
+
# entrypoint below, `infisical run` overrides the container env, so for the Infisical-managed deploy
|
|
35
|
+
# set FLASH_WORKER_IMAGE_PER_SM in the vault (path /flash) or add it to INFISICAL_KEEP -- otherwise
|
|
36
|
+
# this default may not reach the server.
|
|
37
|
+
ENV FLASH_WORKER_IMAGE_PER_SM=1
|
|
38
|
+
|
|
39
|
+
# secret injection wrapper: no-op passthrough unless INFISICAL_CLIENT_ID is set, else
|
|
40
|
+
# `infisical login` (universal-auth) then `infisical run --path /flash` before the server.
|
|
41
|
+
ENTRYPOINT ["/app/infisical-entrypoint.sh"]
|
|
42
|
+
CMD ["python", "-m", "flash.server", "--host", "0.0.0.0", "--port", "8080"]
|
|
@@ -73,9 +73,9 @@ RUN pip install --no-cache-dir \
|
|
|
73
73
|
ARG FLASH_ATTN_SPEC=flash-attn
|
|
74
74
|
# Source-build fallback only (ignored when FLASH_ATTN_SPEC is a wheel): bound the compile so it
|
|
75
75
|
# doesn't OOM. TORCH_CUDA_ARCH_LIST restricts to the catalog's arches — Ampere (8.0 A100 / 8.6
|
|
76
|
-
# 3090/A40), Ada (8.9 4090), Hopper (9.0 H100), Blackwell (12.0 RTX 5090; sm120); MAX_JOBS bounds
|
|
76
|
+
# 3090/A40), Ada (8.9 4090), Hopper (9.0 H100), Blackwell datacenter (10.0 B200; sm100) + workstation (12.0 RTX 5090/Pro 6000; sm120); MAX_JOBS bounds
|
|
77
77
|
# peak compile memory. (A from-source build still needs a big-RAM host; the wheel avoids all of it.)
|
|
78
|
-
RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 12.0" MAX_JOBS=4 \
|
|
78
|
+
RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 10.0 12.0" MAX_JOBS=4 \
|
|
79
79
|
pip install --no-cache-dir "${FLASH_ATTN_SPEC}" --no-build-isolation \
|
|
80
80
|
&& echo "flash-attn: installed (${FLASH_ATTN_SPEC})" || echo "flash-attn: build failed, SDPA fallback"
|
|
81
81
|
|
|
@@ -119,7 +119,7 @@ RUN if [ -n "${FLASH_ATTN_3_SPEC}" ]; then \
|
|
|
119
119
|
# import fine but raise "no kernel image is available for execution on the device" at the first conv
|
|
120
120
|
# forward on sm120 — GPU-verified. engine.worker.packing.gdn_packing_available runs a conv smoke too,
|
|
121
121
|
# so even a wrong-arch build can't crash a run (it just keeps GDN packing off).
|
|
122
|
-
RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 12.0" CAUSAL_CONV1D_FORCE_BUILD=TRUE MAX_JOBS=4 \
|
|
122
|
+
RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 10.0 12.0" CAUSAL_CONV1D_FORCE_BUILD=TRUE MAX_JOBS=4 \
|
|
123
123
|
pip install --no-cache-dir "causal-conv1d==1.6.2.post1" --no-build-isolation \
|
|
124
124
|
&& python -c "import causal_conv1d" \
|
|
125
125
|
&& echo "causal_conv1d: installed (GDN packing enabled)" \
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: freesolo-flash-dev
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.26
|
|
4
4
|
Summary: Flash — managed LoRA post-training (SFT/GRPO) for Freesolo environments, driven by the `flash` CLI
|
|
5
5
|
Project-URL: Homepage, https://github.com/freesolo-co/flash
|
|
6
6
|
Project-URL: Repository, https://github.com/freesolo-co/flash
|
|
@@ -85,7 +85,6 @@ The allocator picks the cheapest validated RunPod GPU class that fits the run.
|
|
|
85
85
|
and ready-to-run configs to start from
|
|
86
86
|
- `flash/serve/`, `flash/server/` — adapter serving and the FastAPI control
|
|
87
87
|
plane (run operator-side via the separate `flash-server` command)
|
|
88
|
-
- `flash/mcp/` — stdio MCP bridge for coding agents
|
|
89
88
|
- `Dockerfile` — the control-plane image (used by the repo docker-compose)
|
|
90
89
|
- `tests/` — pytest suite (CPU-only; offline-by-default, no GPU/network)
|
|
91
90
|
|
|
@@ -117,11 +116,15 @@ Two channels are published to PyPI from the *same source*, distinguished by one
|
|
|
117
116
|
| prod | `freesolo-flash` | `flash` | `flash.freesolo.co` | push to `main` that bumps `[project].version` (`.github/workflows/publish.yml`) |
|
|
118
117
|
| dev | `freesolo-flash-dev` | `flash-dev` | `flash-dev.freesolo.co` | push to `dev` whose `[tool.flash-dev].version` isn't on PyPI yet (`.github/workflows/publish-dev.yml`) |
|
|
119
118
|
|
|
120
|
-
|
|
119
|
+
Each environment holds exactly **one** channel: both packages ship the same import package
|
|
120
|
+
(`flash/`) with one baked `CHANNEL` line, so installing both into the same environment makes the
|
|
121
|
+
later install win for *both* CLIs. For side-by-side prod and staging, install each channel in its
|
|
122
|
+
own virtualenv (or via `pipx`, which isolates per tool). The dev build is produced by
|
|
121
123
|
`scripts/build_dev_dist.py`, which renames the package/CLI and flips `CHANNEL` to `dev` before
|
|
122
|
-
`uv build`.
|
|
123
|
-
|
|
124
|
-
|
|
124
|
+
`uv build`. Both channels ship at the **same version**: `[project].version` and
|
|
125
|
+
`[tool.flash-dev].version` must match (CI enforces this via `.github/workflows/version-parity.yml`),
|
|
126
|
+
so cutting a release means bumping both together. Either CLI still honours an explicit
|
|
127
|
+
`FLASH_API_URL` / the `login --api-url` flag; the channel only sets the default.
|
|
125
128
|
|
|
126
129
|
## Serving From an API
|
|
127
130
|
|
|
@@ -36,7 +36,6 @@ The allocator picks the cheapest validated RunPod GPU class that fits the run.
|
|
|
36
36
|
and ready-to-run configs to start from
|
|
37
37
|
- `flash/serve/`, `flash/server/` — adapter serving and the FastAPI control
|
|
38
38
|
plane (run operator-side via the separate `flash-server` command)
|
|
39
|
-
- `flash/mcp/` — stdio MCP bridge for coding agents
|
|
40
39
|
- `Dockerfile` — the control-plane image (used by the repo docker-compose)
|
|
41
40
|
- `tests/` — pytest suite (CPU-only; offline-by-default, no GPU/network)
|
|
42
41
|
|
|
@@ -68,11 +67,15 @@ Two channels are published to PyPI from the *same source*, distinguished by one
|
|
|
68
67
|
| prod | `freesolo-flash` | `flash` | `flash.freesolo.co` | push to `main` that bumps `[project].version` (`.github/workflows/publish.yml`) |
|
|
69
68
|
| dev | `freesolo-flash-dev` | `flash-dev` | `flash-dev.freesolo.co` | push to `dev` whose `[tool.flash-dev].version` isn't on PyPI yet (`.github/workflows/publish-dev.yml`) |
|
|
70
69
|
|
|
71
|
-
|
|
70
|
+
Each environment holds exactly **one** channel: both packages ship the same import package
|
|
71
|
+
(`flash/`) with one baked `CHANNEL` line, so installing both into the same environment makes the
|
|
72
|
+
later install win for *both* CLIs. For side-by-side prod and staging, install each channel in its
|
|
73
|
+
own virtualenv (or via `pipx`, which isolates per tool). The dev build is produced by
|
|
72
74
|
`scripts/build_dev_dist.py`, which renames the package/CLI and flips `CHANNEL` to `dev` before
|
|
73
|
-
`uv build`.
|
|
74
|
-
|
|
75
|
-
|
|
75
|
+
`uv build`. Both channels ship at the **same version**: `[project].version` and
|
|
76
|
+
`[tool.flash-dev].version` must match (CI enforces this via `.github/workflows/version-parity.yml`),
|
|
77
|
+
so cutting a release means bumping both together. Either CLI still honours an explicit
|
|
78
|
+
`FLASH_API_URL` / the `login --api-url` flag; the channel only sets the default.
|
|
76
79
|
|
|
77
80
|
## Serving From an API
|
|
78
81
|
|
|
@@ -69,7 +69,10 @@ def main() -> int:
|
|
|
69
69
|
ap.add_argument("--gpu-type-id", required=True, help="RunPod gpuTypeId, e.g. 'NVIDIA H100 80GB HBM3'")
|
|
70
70
|
ap.add_argument("--image", default="ghcr.io/freesolo-co/flash-worker:cu128")
|
|
71
71
|
ap.add_argument("--out", default="build/kernel_cache")
|
|
72
|
-
|
|
72
|
+
# the warm pod only pulls the ~20GB image + writes the cache (no model download), so keep this
|
|
73
|
+
# modest -- an over-large ask shrinks the eligible host pool and trips "machine does not have the
|
|
74
|
+
# resources" on scarce classes (e.g. Blackwell sm120 on secure cloud).
|
|
75
|
+
ap.add_argument("--container-disk-gb", type=int, default=60)
|
|
73
76
|
ap.add_argument("--deadline-min", type=int, default=45)
|
|
74
77
|
ap.add_argument("--run-id", default="", help="unique suffix for the temp repo (default: time+uuid)")
|
|
75
78
|
ap.add_argument(
|
|
@@ -221,7 +224,18 @@ def _verify(out: str, sm: str) -> int:
|
|
|
221
224
|
blob = os.path.join(out, "mega_cache.bin")
|
|
222
225
|
meta = os.path.join(out, "mega_cache.json")
|
|
223
226
|
if not os.path.isfile(blob):
|
|
224
|
-
log(f"FAIL: no mega_cache.bin in {out}")
|
|
227
|
+
log(f"FAIL: no mega_cache.bin in {out}; what the warmup actually produced:")
|
|
228
|
+
for root, _, files in os.walk(out):
|
|
229
|
+
for f in sorted(files):
|
|
230
|
+
p = os.path.join(root, f)
|
|
231
|
+
log(f" present: {os.path.relpath(p, out)} ({os.path.getsize(p)} b)")
|
|
232
|
+
wl = os.path.join(out, "warmup.log")
|
|
233
|
+
if os.path.isfile(wl):
|
|
234
|
+
log(" --- warmup.log tail ---")
|
|
235
|
+
with open(wl, errors="replace") as wlf:
|
|
236
|
+
tail = wlf.read().splitlines()[-40:]
|
|
237
|
+
for line in tail:
|
|
238
|
+
log(f" | {line}")
|
|
225
239
|
return 1
|
|
226
240
|
try:
|
|
227
241
|
with open(meta) as f:
|
|
@@ -49,7 +49,11 @@ def main() -> int:
|
|
|
49
49
|
if arch:
|
|
50
50
|
cmd += ["--arch", arch]
|
|
51
51
|
print(f"[bake] running: {' '.join(cmd)}", flush=True)
|
|
52
|
-
|
|
52
|
+
# capture the warmup output into /out so it ships back with the cache -- lets the CI helper show
|
|
53
|
+
# WHICH warm steps compiled and what save_cache_artifacts returned when no mega_cache.bin lands.
|
|
54
|
+
os.makedirs("/out", exist_ok=True)
|
|
55
|
+
with open("/out/warmup.log", "wb") as lf:
|
|
56
|
+
rc = subprocess.call(cmd, env=env, stdout=lf, stderr=subprocess.STDOUT)
|
|
53
57
|
print(f"[bake] kernel_warmup rc={rc}", flush=True)
|
|
54
58
|
|
|
55
59
|
# ship the whole cache tree back (mega blob + metadata + raw triton/inductor dirs).
|
|
@@ -16,8 +16,8 @@ A standardized, production-grade output theme for every `flash` command.
|
|
|
16
16
|
disable the themed layout with `FLASH_STYLE=0`. `NO_COLOR` keeps the layout but drops ANSI color.
|
|
17
17
|
- **No new dependencies:** pure standard library, like the rest of the client CLI.
|
|
18
18
|
|
|
19
|
-
The rendering lives in `flash/cli/
|
|
20
|
-
`flash/cli/
|
|
19
|
+
The rendering lives in `flash/cli/render.py`; the command wiring is in
|
|
20
|
+
`flash/cli/commands.py` and `envpush.py`.
|
|
21
21
|
|
|
22
22
|
## Preview
|
|
23
23
|
|
|
@@ -22,8 +22,8 @@ import tempfile
|
|
|
22
22
|
from contextlib import redirect_stderr, redirect_stdout
|
|
23
23
|
from pathlib import Path
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
from flash.cli
|
|
25
|
+
import flash.cli as cli
|
|
26
|
+
from flash.cli import render
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class _Utf8(io.StringIO):
|
|
@@ -185,7 +185,7 @@ def _capture_argv(argv, *, styled, theme="dark", cwd=None, with_stderr=False) ->
|
|
|
185
185
|
``with_stderr`` is set, the stderr note (e.g. `flash train`'s hand-off line) is shown first,
|
|
186
186
|
as it appears in a real terminal before the streamed logs."""
|
|
187
187
|
_set_style(styled, theme)
|
|
188
|
-
commands = sys.modules["flash.cli.
|
|
188
|
+
commands = sys.modules["flash.cli.commands"]
|
|
189
189
|
saved = commands.client_from_config
|
|
190
190
|
commands.client_from_config = lambda *a, **k: FAKE
|
|
191
191
|
out, err = _Utf8(), _Utf8()
|
|
@@ -476,7 +476,7 @@ def main():
|
|
|
476
476
|
out_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(__file__).parent / "index.html"
|
|
477
477
|
# Deterministic preview: pin the dry-run id (cmd_train calls new_run_id() every time) so the
|
|
478
478
|
# committed gallery doesn't churn on every regeneration.
|
|
479
|
-
sys.modules["flash.cli.
|
|
479
|
+
sys.modules["flash.cli.commands"].new_run_id = lambda: "flash-1718900000-d0cf00ed"
|
|
480
480
|
with tempfile.TemporaryDirectory() as td:
|
|
481
481
|
# Point the installed-env registry at an empty temp manifest so `flash env list` never
|
|
482
482
|
# leaks a developer's real installed env slugs (~/.flash/envs.json) into the preview.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Flash — managed LoRA post-training: log in with your freesolo key, train.
|
|
2
2
|
|
|
3
3
|
A focused developer experience (TOML run specs, pluggable environments,
|
|
4
|
-
CLI/API
|
|
4
|
+
CLI/API entry points, adapter deployment). Users authenticate with their
|
|
5
5
|
freesolo API key (`flash login`); the control plane runs each job on a managed
|
|
6
6
|
RunPod GPU behind the scenes.
|
|
7
7
|
"""
|
|
@@ -5,8 +5,13 @@ below): it installs a ``flash`` CLI that talks to the production control plane.
|
|
|
5
5
|
package ``freesolo-flash-dev`` is built from this *same source* with only this one line rewritten
|
|
6
6
|
to ``CHANNEL = "dev"`` (see ``scripts/build_dev_dist.py``); everything that differs between the two
|
|
7
7
|
channels — the CLI name, the PyPI distribution name, the default control-plane URL — derives from
|
|
8
|
-
it below, so there is exactly one thing to flip. An explicit ``FLASH_API_URL`` /
|
|
9
|
-
``
|
|
8
|
+
it below, so there is exactly one thing to flip. An explicit ``FLASH_API_URL`` / the
|
|
9
|
+
``login --api-url`` flag always wins; the channel only picks the *default* plane.
|
|
10
|
+
|
|
11
|
+
Both channels ship the SAME import package (``flash/``) with this one baked line, so a single
|
|
12
|
+
environment holds exactly ONE channel — installing ``freesolo-flash`` and ``freesolo-flash-dev``
|
|
13
|
+
into the same environment makes the later install win for both CLIs. For side-by-side prod and
|
|
14
|
+
staging, install each channel in its own virtualenv (or via ``pipx``, which isolates per tool).
|
|
10
15
|
"""
|
|
11
16
|
|
|
12
17
|
from __future__ import annotations
|
|
@@ -46,6 +46,13 @@ class ModelInfo:
|
|
|
46
46
|
# tier needs a bigger card than SFT (the colocate 2nd weight copy + KV pool). Consumed by
|
|
47
47
|
# engine.vram.model_required_vram_gb.
|
|
48
48
|
grpo_min_vram_gb: int = 0
|
|
49
|
+
# SFT hard VRAM floor (GB). 0 => SFT sizes purely from the param-based estimate and is free to
|
|
50
|
+
# down-route to a smaller validated card (the default — e.g. a 4B SFT estimates ~17 GB and rents
|
|
51
|
+
# a 48 GB card, NOT its ``min_vram_gb`` reference). Set it ONLY when a curated model must not be
|
|
52
|
+
# placed on the cheapest card the estimate would otherwise allow — e.g. a very large checkpoint
|
|
53
|
+
# whose ~param-est margin over the frozen-weights floor is too thin on the next card down.
|
|
54
|
+
# Consumed by engine.vram.model_required_vram_gb (the SFT analog of ``grpo_min_vram_gb``).
|
|
55
|
+
sft_min_vram_gb: int = 0
|
|
49
56
|
notes: str = ""
|
|
50
57
|
# Worker container disk this model needs (GB). 0 = the platform default (64 GB)
|
|
51
58
|
# suffices. The runner raises gpu.disk_gb to at least this, so big-checkpoint
|
|
@@ -64,8 +71,14 @@ class ModelInfo:
|
|
|
64
71
|
# completion cap. Curated per model below; defaults to the open-model fallback.
|
|
65
72
|
vocab_size: int = _DEFAULT_VOCAB_SIZE
|
|
66
73
|
# Total parameters in billions — the numeric model size the cost estimator reads directly
|
|
67
|
-
# (no parsing of the ``params`` display string).
|
|
74
|
+
# (no parsing of the ``params`` display string). Drives the memory/size terms (VRAM, disk,
|
|
75
|
+
# download), which always size the FULL checkpoint. Curated per catalog model below.
|
|
68
76
|
params_b: float = 0.0
|
|
77
|
+
# Parameters ACTIVE per token in billions — only meaningful for an MoE, where a token routes
|
|
78
|
+
# through a small subset of experts. The cost estimator's per-token FLOPs/step-time term reads
|
|
79
|
+
# this (a token exercises only the active params), while VRAM/disk/download keep using the total
|
|
80
|
+
# ``params_b``. 0.0 (the dense default) means "same as params_b" — every token hits every param.
|
|
81
|
+
active_params_b: float = 0.0
|
|
69
82
|
|
|
70
83
|
def to_dict(self) -> dict[str, Any]:
|
|
71
84
|
return asdict(self)
|
|
@@ -89,7 +102,7 @@ MODELS: dict[str, ModelInfo] = {
|
|
|
89
102
|
thinking="hybrid",
|
|
90
103
|
notes="On-device class SLM (131k ctx); standard Llama architecture.",
|
|
91
104
|
),
|
|
92
|
-
#
|
|
105
|
+
# Qwen3.5 dense family: validated on the modern worker stack
|
|
93
106
|
# (trl 1.x / vllm 0.19 / transformers 5.x). Trained + served TEXT-ONLY: the
|
|
94
107
|
# checkpoints are natively multimodal, so LoRA excludes the vision tower and vLLM
|
|
95
108
|
# loads language_model_only (see flash.engine.worker). Each entry passed a real
|
|
@@ -153,6 +166,64 @@ MODELS: dict[str, ModelInfo] = {
|
|
|
153
166
|
"(two bf16 copies + KV + the 248k-vocab fp32 logits) needs an 80 GB-class card "
|
|
154
167
|
"(grpo_min_vram_gb floor).",
|
|
155
168
|
),
|
|
169
|
+
# ---- Qwen3.6 MoE: the big-checkpoint tier (H200 for SFT, B200 for GRPO) ----
|
|
170
|
+
# 35B-A3B is a Mixture-of-Experts checkpoint: ~3B parameters are ACTIVE per token, but all 35B
|
|
171
|
+
# are materialized on the GPU, so the MEMORY/disk/download terms size the FULL 35B (~70 GB bf16)
|
|
172
|
+
# while the COMPUTE terms (activations, KV pool, rank-linear LoRA) size the ~3B active backbone
|
|
173
|
+
# (engine.vram is MoE-aware via active_params_b). bf16 LoRA, NOT QLoRA — same reason as the 9B.
|
|
174
|
+
# Because the resident weights dominate and the active compute is tiny, the GPU tier is set by
|
|
175
|
+
# how many weight copies each algorithm holds, NOT by context length:
|
|
176
|
+
# * SFT — ONE ~70 GB copy + small active-compute (~82 GB peak, ~flat in context) -> fits the
|
|
177
|
+
# 141 GB H200 with wide margin (context ~unbounded by VRAM). Live-validated on a B200; the
|
|
178
|
+
# H200 down-tier is the MoE-aware win (cheaper, plentiful stock).
|
|
179
|
+
# * GRPO — colocates the vLLM rollout, so TWO ~70 GB copies (trainer + engine) are resident at
|
|
180
|
+
# the rollout peak (~167 GB) -> needs the 180 GB B200; the H200 can't hold both. The MoE
|
|
181
|
+
# rollout weight-sync needed a fused-expert name fix (engine.worker.lora._remap_vl_sync_weights
|
|
182
|
+
# passes the multimodal ``model.language_model.*`` names through to vLLM's own mapper). Both
|
|
183
|
+
# single- and multi-turn GRPO live-validated on a B200.
|
|
184
|
+
"Qwen/Qwen3.6-35B-A3B": ModelInfo(
|
|
185
|
+
id="Qwen/Qwen3.6-35B-A3B",
|
|
186
|
+
display_name="Qwen3.6 35B-A3B (MoE)",
|
|
187
|
+
params="35B total / ~3B active (MoE)",
|
|
188
|
+
# TOTAL parameters (billions) the SFT VRAM equation + cost projection read. For an MoE
|
|
189
|
+
# checkpoint the size term is the TOTAL count, not the ~3B active: download/VRAM/disk size the
|
|
190
|
+
# FULL checkpoint that lands on the GPU (all experts are materialized). 35.0 is the CALIBRATED
|
|
191
|
+
# total: the live-validated single-B200 SFT fit depends on it — the honest-peak equation lands
|
|
192
|
+
# at the 180 GB B200's usable budget, and the marketing "~35.95B" figure tips it over (186 GB,
|
|
193
|
+
# see test_sft_equation_covers_honest_peak_across_seq_boundary). Keep 35.0.
|
|
194
|
+
params_b=35.0,
|
|
195
|
+
# ~3B ACTIVE per token (the "A3B" in the name): a token routes through a small subset of
|
|
196
|
+
# experts, so cost/step-time FLOPs scale with ~3B, not the 35B total. Without this the
|
|
197
|
+
# estimator would price SFT as if every token exercised all 35B params — ~10x too slow/costly.
|
|
198
|
+
active_params_b=3.0,
|
|
199
|
+
vocab_size=248_320,
|
|
200
|
+
algos=("sft", "grpo"),
|
|
201
|
+
min_vram_gb=141,
|
|
202
|
+
# Hard SFT floor: with MoE-aware sizing the SFT estimate is ~82 GB (the 70 GB resident weights
|
|
203
|
+
# dominate; the active-3B activations/KV are tiny), which would otherwise down-route to the
|
|
204
|
+
# 96 GB RTX Pro 6000 (consumer Blackwell, thin margin over the 70 GB base) or the 80 GB H100
|
|
205
|
+
# (too tight). Floor to 100 GB so SFT lands on the 141 GB H200 — a datacenter card with wide
|
|
206
|
+
# margin, ~$1.50/hr cheaper than the B200 and not needed here.
|
|
207
|
+
sft_min_vram_gb=100,
|
|
208
|
+
# GRPO floor = the 180 GB B200 (colocated GRPO holds two ~70 GB weight copies + a KV pool; the
|
|
209
|
+
# 141 GB H200 can't hold the trainer + vLLM rollout). The base ~167 GB two-copy estimate already
|
|
210
|
+
# routes GRPO to the B200, but setting the floor ALSO ENGAGES the long-context escalation —
|
|
211
|
+
# model_required_vram_gb only adds grpo_seq_escalation_gb when a grpo floor is set. The
|
|
212
|
+
# escalation keys on the ~3B ACTIVE params, so default/moderate GRPO still fits the B200 but a
|
|
213
|
+
# long (>~16k-token, e.g. 32k) rollout is sized PAST 180 GB and rejected at parse time, instead
|
|
214
|
+
# of booting a B200 and OOMing in vLLM's KV allocation.
|
|
215
|
+
grpo_min_vram_gb=180,
|
|
216
|
+
quant="bf16",
|
|
217
|
+
recommended_gpu="H200",
|
|
218
|
+
thinking="hybrid",
|
|
219
|
+
# ~70 GB bf16 checkpoint. Peak disk = HF download (~70 GB) + Xet temp (~70 GB) + per-step
|
|
220
|
+
# deployable-checkpoint saves; floor to 200 GB so the rent doesn't hit "No space left on
|
|
221
|
+
# device" (the runner raises gpu.disk_gb to this out of the box).
|
|
222
|
+
min_disk_gb=200,
|
|
223
|
+
notes="MoE (35B total / ~3B active), bf16 LoRA. SFT runs on the 141 GB H200 (the ~70 GB "
|
|
224
|
+
"weights dominate; active-3B compute keeps activations/KV tiny, so context is ~unbounded by "
|
|
225
|
+
"VRAM); colocated GRPO needs the 180 GB B200 (trainer + vLLM rollout = two 70 GB copies).",
|
|
226
|
+
),
|
|
156
227
|
}
|
|
157
228
|
|
|
158
229
|
|
{freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/__init__.py
RENAMED
|
@@ -17,9 +17,9 @@ from flash._logging import configure_logging, get_logger
|
|
|
17
17
|
from flash._update_check import emit_update_notice, maybe_start_update_check
|
|
18
18
|
|
|
19
19
|
# Command handlers + the patched client surface live in submodules; re-export them so
|
|
20
|
-
# `flash.cli
|
|
21
|
-
# `flash.cli.
|
|
22
|
-
from flash.cli.
|
|
20
|
+
# `flash.cli` stays the single public import surface (and so monkeypatching
|
|
21
|
+
# `flash.cli.commands` reaches the bare globals the handlers read).
|
|
22
|
+
from flash.cli.commands import ( # noqa: F401
|
|
23
23
|
_CLI_DONE_STATES,
|
|
24
24
|
_OK_STATES,
|
|
25
25
|
_STARTER_ENV_PY,
|
|
@@ -34,6 +34,7 @@ from flash.cli.main.commands import ( # noqa: F401
|
|
|
34
34
|
cmd_deployments,
|
|
35
35
|
cmd_env_list,
|
|
36
36
|
cmd_env_setup,
|
|
37
|
+
cmd_export,
|
|
37
38
|
cmd_gpus,
|
|
38
39
|
cmd_login,
|
|
39
40
|
cmd_models,
|
|
@@ -45,9 +46,9 @@ from flash.cli.main.commands import ( # noqa: F401
|
|
|
45
46
|
cmd_whoami,
|
|
46
47
|
verify_freesolo_key,
|
|
47
48
|
)
|
|
48
|
-
from flash.cli.
|
|
49
|
+
from flash.cli.envpush import cmd_env_install, cmd_env_push
|
|
49
50
|
|
|
50
|
-
logger = get_logger("flash.cli
|
|
51
|
+
logger = get_logger("flash.cli")
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
def main(argv: list[str] | None = None) -> int:
|
|
@@ -194,6 +195,39 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
194
195
|
undeploy.add_argument("run_id")
|
|
195
196
|
undeploy.set_defaults(func=cmd_undeploy)
|
|
196
197
|
|
|
198
|
+
export = sub.add_parser(
|
|
199
|
+
"export", help="export a trained adapter to your own HuggingFace repo"
|
|
200
|
+
)
|
|
201
|
+
export.add_argument(
|
|
202
|
+
"--adapter-id",
|
|
203
|
+
dest="adapter_id",
|
|
204
|
+
required=True,
|
|
205
|
+
help="the Freesolo adapter id (the run id) to export",
|
|
206
|
+
)
|
|
207
|
+
export.add_argument(
|
|
208
|
+
"--repository",
|
|
209
|
+
required=True,
|
|
210
|
+
help="destination HuggingFace repo 'owner/name' (created if it doesn't exist)",
|
|
211
|
+
)
|
|
212
|
+
export.add_argument(
|
|
213
|
+
"--api-key",
|
|
214
|
+
help="HuggingFace token with write access to --repository "
|
|
215
|
+
"(default: HF_TOKEN from your shell or a local .env / .env.local)",
|
|
216
|
+
)
|
|
217
|
+
export.add_argument(
|
|
218
|
+
"--step",
|
|
219
|
+
type=int,
|
|
220
|
+
default=None,
|
|
221
|
+
help="export a specific intermediate checkpoint (see `flash checkpoints <adapter-id>`) "
|
|
222
|
+
"instead of the run's final adapter; works even for a run cancelled mid-RL",
|
|
223
|
+
)
|
|
224
|
+
export.add_argument(
|
|
225
|
+
"--public",
|
|
226
|
+
action="store_true",
|
|
227
|
+
help="create the destination repo as public (default: private)",
|
|
228
|
+
)
|
|
229
|
+
export.set_defaults(func=cmd_export)
|
|
230
|
+
|
|
197
231
|
deployments = sub.add_parser("deployments", help="list active serving deployments")
|
|
198
232
|
deployments.set_defaults(func=cmd_deployments)
|
|
199
233
|
|