PyPI - freesolo-flash-dev - Versions diffs - 0.2.25__tar.gz - Mend

freesolo-flash-dev 0.2.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (230) hide show

freesolo_flash_dev-0.2.25/.dockerignore +20 -0
freesolo_flash_dev-0.2.25/.env.example +34 -0
freesolo_flash_dev-0.2.25/.github/workflows/bake-kernel-cache.yml +136 -0
freesolo_flash_dev-0.2.25/.github/workflows/ci.yml +28 -0
freesolo_flash_dev-0.2.25/.github/workflows/main-source-guard.yml +20 -0
freesolo_flash_dev-0.2.25/.github/workflows/publish-dev.yml +107 -0
freesolo_flash_dev-0.2.25/.github/workflows/publish-image.yml +69 -0
freesolo_flash_dev-0.2.25/.github/workflows/publish.yml +174 -0
freesolo_flash_dev-0.2.25/.github/workflows/worker-image.yml +85 -0
freesolo_flash_dev-0.2.25/.gitignore +33 -0
freesolo_flash_dev-0.2.25/Dockerfile +24 -0
freesolo_flash_dev-0.2.25/Dockerfile.worker +204 -0
freesolo_flash_dev-0.2.25/LICENSE +201 -0
freesolo_flash_dev-0.2.25/PKG-INFO +192 -0
freesolo_flash_dev-0.2.25/README.md +143 -0
freesolo_flash_dev-0.2.25/build/kernel_cache/.gitignore +5 -0
freesolo_flash_dev-0.2.25/build/kernel_cache/.keep +4 -0
freesolo_flash_dev-0.2.25/docker/Dockerfile.kernelcache +18 -0
freesolo_flash_dev-0.2.25/docker/bake_kernel_cache.py +240 -0
freesolo_flash_dev-0.2.25/docker/bake_pod_entry.py +78 -0
freesolo_flash_dev-0.2.25/docker/make_rp_handler.py +51 -0
freesolo_flash_dev-0.2.25/docs/cli-style/README.md +33 -0
freesolo_flash_dev-0.2.25/docs/cli-style/generate.py +503 -0
freesolo_flash_dev-0.2.25/docs/cli-style/index.html +709 -0
freesolo_flash_dev-0.2.25/docs/cli-style/preview.png +0 -0
freesolo_flash_dev-0.2.25/docs/kernel-cache.md +100 -0
freesolo_flash_dev-0.2.25/flash/__init__.py +29 -0
freesolo_flash_dev-0.2.25/flash/_channel.py +23 -0
freesolo_flash_dev-0.2.25/flash/_fileio.py +35 -0
freesolo_flash_dev-0.2.25/flash/_logging.py +49 -0
freesolo_flash_dev-0.2.25/flash/_update_check.py +266 -0
freesolo_flash_dev-0.2.25/flash/catalog.py +253 -0
freesolo_flash_dev-0.2.25/flash/cli/__init__.py +1 -0
freesolo_flash_dev-0.2.25/flash/cli/main/__init__.py +227 -0
freesolo_flash_dev-0.2.25/flash/cli/main/__main__.py +6 -0
freesolo_flash_dev-0.2.25/flash/cli/main/commands.py +636 -0
freesolo_flash_dev-0.2.25/flash/cli/main/envpush.py +317 -0
freesolo_flash_dev-0.2.25/flash/cli/main/render.py +599 -0
freesolo_flash_dev-0.2.25/flash/cli/main/training_doc.py +455 -0
freesolo_flash_dev-0.2.25/flash/client/__init__.py +14 -0
freesolo_flash_dev-0.2.25/flash/client/config.py +70 -0
freesolo_flash_dev-0.2.25/flash/client/http.py +372 -0
freesolo_flash_dev-0.2.25/flash/client/runtime_secrets.py +69 -0
freesolo_flash_dev-0.2.25/flash/client/specs.py +20 -0
freesolo_flash_dev-0.2.25/flash/cost/__init__.py +16 -0
freesolo_flash_dev-0.2.25/flash/cost/analytical.py +175 -0
freesolo_flash_dev-0.2.25/flash/cost/facts.py +114 -0
freesolo_flash_dev-0.2.25/flash/cost/spec.py +113 -0
freesolo_flash_dev-0.2.25/flash/cost/types.py +158 -0
freesolo_flash_dev-0.2.25/flash/engine/__init__.py +6 -0
freesolo_flash_dev-0.2.25/flash/engine/accounting.py +36 -0
freesolo_flash_dev-0.2.25/flash/engine/chalk_kernels.py +116 -0
freesolo_flash_dev-0.2.25/flash/engine/multiturn_rollout.py +780 -0
freesolo_flash_dev-0.2.25/flash/engine/recipe.py +86 -0
freesolo_flash_dev-0.2.25/flash/engine/vram.py +603 -0
freesolo_flash_dev-0.2.25/flash/engine/worker/__init__.py +2916 -0
freesolo_flash_dev-0.2.25/flash/engine/worker/__main__.py +4 -0
freesolo_flash_dev-0.2.25/flash/engine/worker/kernel_warmup.py +400 -0
freesolo_flash_dev-0.2.25/flash/engine/worker/lora.py +796 -0
freesolo_flash_dev-0.2.25/flash/engine/worker/packing.py +366 -0
freesolo_flash_dev-0.2.25/flash/engine/worker/perf.py +1048 -0
freesolo_flash_dev-0.2.25/flash/envs/__init__.py +10 -0
freesolo_flash_dev-0.2.25/flash/envs/adapter/__init__.py +883 -0
freesolo_flash_dev-0.2.25/flash/envs/adapter/rubric.py +222 -0
freesolo_flash_dev-0.2.25/flash/envs/base.py +52 -0
freesolo_flash_dev-0.2.25/flash/envs/registry.py +62 -0
freesolo_flash_dev-0.2.25/flash/mcp/__init__.py +1 -0
freesolo_flash_dev-0.2.25/flash/mcp/server.py +85 -0
freesolo_flash_dev-0.2.25/flash/providers/__init__.py +59 -0
freesolo_flash_dev-0.2.25/flash/providers/_auth.py +24 -0
freesolo_flash_dev-0.2.25/flash/providers/_http.py +230 -0
freesolo_flash_dev-0.2.25/flash/providers/_instance.py +416 -0
freesolo_flash_dev-0.2.25/flash/providers/_instance_bootstrap.py +517 -0
freesolo_flash_dev-0.2.25/flash/providers/_poll.py +311 -0
freesolo_flash_dev-0.2.25/flash/providers/allocator.py +193 -0
freesolo_flash_dev-0.2.25/flash/providers/base.py +431 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/__init__.py +127 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/api.py +522 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/auth.py +17 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/gpus.py +29 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/__init__.py +632 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/builders.py +122 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/preflight.py +23 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/pricing.py +26 -0
freesolo_flash_dev-0.2.25/flash/providers/hyperstack/train.py +25 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/__init__.py +139 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/api.py +261 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/auth.py +18 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/gpus.py +29 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/jobs/__init__.py +724 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/jobs/builders.py +118 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/preflight.py +27 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/pricing.py +51 -0
freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/train.py +27 -0
freesolo_flash_dev-0.2.25/flash/providers/preflight.py +55 -0
freesolo_flash_dev-0.2.25/flash/providers/realized.py +80 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/__init__.py +130 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/api.py +186 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/auth.py +37 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/cost.py +57 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/gpus.py +46 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/jobs.py +956 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/keys.py +139 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/preflight.py +30 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/preload.py +915 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/pricing.py +18 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/slots.py +79 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/train/__init__.py +150 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/train/deps.py +395 -0
freesolo_flash_dev-0.2.25/flash/providers/runpod/train/endpoints.py +820 -0
freesolo_flash_dev-0.2.25/flash/py.typed +0 -0
freesolo_flash_dev-0.2.25/flash/runner/__init__.py +686 -0
freesolo_flash_dev-0.2.25/flash/runner/checkpoints.py +82 -0
freesolo_flash_dev-0.2.25/flash/runner/deploy.py +422 -0
freesolo_flash_dev-0.2.25/flash/runner/lifecycle.py +672 -0
freesolo_flash_dev-0.2.25/flash/schema/__init__.py +375 -0
freesolo_flash_dev-0.2.25/flash/schema/fields.py +331 -0
freesolo_flash_dev-0.2.25/flash/serve/__init__.py +1 -0
freesolo_flash_dev-0.2.25/flash/serve/deploy.py +326 -0
freesolo_flash_dev-0.2.25/flash/serve/pricing.py +60 -0
freesolo_flash_dev-0.2.25/flash/server/__init__.py +1 -0
freesolo_flash_dev-0.2.25/flash/server/__main__.py +20 -0
freesolo_flash_dev-0.2.25/flash/server/app.py +961 -0
freesolo_flash_dev-0.2.25/flash/server/auth.py +263 -0
freesolo_flash_dev-0.2.25/flash/server/billing.py +124 -0
freesolo_flash_dev-0.2.25/flash/server/checkpoints.py +110 -0
freesolo_flash_dev-0.2.25/flash/server/db.py +160 -0
freesolo_flash_dev-0.2.25/flash/server/environment_registry.py +102 -0
freesolo_flash_dev-0.2.25/flash/server/envs.py +360 -0
freesolo_flash_dev-0.2.25/flash/server/reconcile.py +163 -0
freesolo_flash_dev-0.2.25/flash/server/run_registry.py +150 -0
freesolo_flash_dev-0.2.25/flash/spec.py +333 -0
freesolo_flash_dev-0.2.25/pyproject.toml +161 -0
freesolo_flash_dev-0.2.25/scripts/build_dev_dist.py +130 -0
freesolo_flash_dev-0.2.25/tests/__init__.py +2 -0
freesolo_flash_dev-0.2.25/tests/_helpers/__init__.py +1 -0
freesolo_flash_dev-0.2.25/tests/_helpers/runner.py +25 -0
freesolo_flash_dev-0.2.25/tests/_helpers/specs.py +18 -0
freesolo_flash_dev-0.2.25/tests/conftest.py +47 -0
freesolo_flash_dev-0.2.25/tests/fixtures/math_eval.jsonl +3 -0
freesolo_flash_dev-0.2.25/tests/fixtures/math_train.jsonl +2 -0
freesolo_flash_dev-0.2.25/tests/live/__init__.py +0 -0
freesolo_flash_dev-0.2.25/tests/live/conftest.py +35 -0
freesolo_flash_dev-0.2.25/tests/live/test_hyperstack_live.py +50 -0
freesolo_flash_dev-0.2.25/tests/live/test_lambda_live.py +49 -0
freesolo_flash_dev-0.2.25/tests/live/test_runpod_live.py +36 -0
freesolo_flash_dev-0.2.25/tests/test_agent_flash_cli_contract.py +228 -0
freesolo_flash_dev-0.2.25/tests/test_algorithms.py +71 -0
freesolo_flash_dev-0.2.25/tests/test_allocator.py +390 -0
freesolo_flash_dev-0.2.25/tests/test_backend_jobspec_contract.py +152 -0
freesolo_flash_dev-0.2.25/tests/test_cancel_remote.py +666 -0
freesolo_flash_dev-0.2.25/tests/test_catalog_consistency.py +57 -0
freesolo_flash_dev-0.2.25/tests/test_chalk_kernels.py +114 -0
freesolo_flash_dev-0.2.25/tests/test_checkpoints.py +298 -0
freesolo_flash_dev-0.2.25/tests/test_cli_commands.py +380 -0
freesolo_flash_dev-0.2.25/tests/test_cli_errors.py +121 -0
freesolo_flash_dev-0.2.25/tests/test_cli_estimate.py +274 -0
freesolo_flash_dev-0.2.25/tests/test_cli_managed.py +144 -0
freesolo_flash_dev-0.2.25/tests/test_cli_render_theme.py +159 -0
freesolo_flash_dev-0.2.25/tests/test_client.py +250 -0
freesolo_flash_dev-0.2.25/tests/test_client_server_integration.py +273 -0
freesolo_flash_dev-0.2.25/tests/test_config_overrides.py +64 -0
freesolo_flash_dev-0.2.25/tests/test_cost_analytical.py +244 -0
freesolo_flash_dev-0.2.25/tests/test_cost_equation.py +46 -0
freesolo_flash_dev-0.2.25/tests/test_cost_estimate.py +79 -0
freesolo_flash_dev-0.2.25/tests/test_cost_hardware.py +87 -0
freesolo_flash_dev-0.2.25/tests/test_cost_models.py +36 -0
freesolo_flash_dev-0.2.25/tests/test_cost_rewards.py +65 -0
freesolo_flash_dev-0.2.25/tests/test_dev_channel.py +99 -0
freesolo_flash_dev-0.2.25/tests/test_disk_gb.py +95 -0
freesolo_flash_dev-0.2.25/tests/test_endpoint_name.py +45 -0
freesolo_flash_dev-0.2.25/tests/test_env_install.py +43 -0
freesolo_flash_dev-0.2.25/tests/test_env_publish.py +522 -0
freesolo_flash_dev-0.2.25/tests/test_env_push.py +314 -0
freesolo_flash_dev-0.2.25/tests/test_env_rate_limit_resolve.py +275 -0
freesolo_flash_dev-0.2.25/tests/test_envs_coverage.py +92 -0
freesolo_flash_dev-0.2.25/tests/test_flash_mvp.py +139 -0
freesolo_flash_dev-0.2.25/tests/test_flash_worker.py +367 -0
freesolo_flash_dev-0.2.25/tests/test_github_urlopen_retry.py +118 -0
freesolo_flash_dev-0.2.25/tests/test_gpus.py +162 -0
freesolo_flash_dev-0.2.25/tests/test_grpo_mask_aware.py +180 -0
freesolo_flash_dev-0.2.25/tests/test_grpo_params.py +626 -0
freesolo_flash_dev-0.2.25/tests/test_grpo_sleep_gate.py +97 -0
freesolo_flash_dev-0.2.25/tests/test_hyperstack_runner.py +1031 -0
freesolo_flash_dev-0.2.25/tests/test_idle_endpoint_reaper.py +285 -0
freesolo_flash_dev-0.2.25/tests/test_jobs.py +1777 -0
freesolo_flash_dev-0.2.25/tests/test_kernel_cache.py +112 -0
freesolo_flash_dev-0.2.25/tests/test_kv_util.py +75 -0
freesolo_flash_dev-0.2.25/tests/test_lambda_runner.py +1394 -0
freesolo_flash_dev-0.2.25/tests/test_logging.py +51 -0
freesolo_flash_dev-0.2.25/tests/test_login_perms.py +91 -0
freesolo_flash_dev-0.2.25/tests/test_managed_hf_repo.py +65 -0
freesolo_flash_dev-0.2.25/tests/test_metrics_schema_agent_contract.py +170 -0
freesolo_flash_dev-0.2.25/tests/test_mig_guard.py +70 -0
freesolo_flash_dev-0.2.25/tests/test_multiturn_rollout.py +785 -0
freesolo_flash_dev-0.2.25/tests/test_open_model_policy.py +95 -0
freesolo_flash_dev-0.2.25/tests/test_orchestrator_flash.py +232 -0
freesolo_flash_dev-0.2.25/tests/test_packing.py +427 -0
freesolo_flash_dev-0.2.25/tests/test_poll_helpers.py +52 -0
freesolo_flash_dev-0.2.25/tests/test_preflight.py +98 -0
freesolo_flash_dev-0.2.25/tests/test_provider_routing.py +426 -0
freesolo_flash_dev-0.2.25/tests/test_provider_teardown_robustness.py +223 -0
freesolo_flash_dev-0.2.25/tests/test_providers_symmetry.py +164 -0
freesolo_flash_dev-0.2.25/tests/test_realized_cost.py +400 -0
freesolo_flash_dev-0.2.25/tests/test_resolve_params_b.py +50 -0
freesolo_flash_dev-0.2.25/tests/test_runmgmt.py +204 -0
freesolo_flash_dev-0.2.25/tests/test_runpod_api_delete.py +90 -0
freesolo_flash_dev-0.2.25/tests/test_runpod_key_waterfall.py +323 -0
freesolo_flash_dev-0.2.25/tests/test_runpod_slots.py +236 -0
freesolo_flash_dev-0.2.25/tests/test_serve.py +428 -0
freesolo_flash_dev-0.2.25/tests/test_server_api.py +1367 -0
freesolo_flash_dev-0.2.25/tests/test_server_billing.py +388 -0
freesolo_flash_dev-0.2.25/tests/test_server_db.py +234 -0
freesolo_flash_dev-0.2.25/tests/test_serving_contract.py +161 -0
freesolo_flash_dev-0.2.25/tests/test_spec_and_validation.py +501 -0
freesolo_flash_dev-0.2.25/tests/test_thinking_config.py +118 -0
freesolo_flash_dev-0.2.25/tests/test_update_check.py +344 -0
freesolo_flash_dev-0.2.25/tests/test_verifiers.py +596 -0
freesolo_flash_dev-0.2.25/tests/test_version.py +49 -0
freesolo_flash_dev-0.2.25/tests/test_vl_warmstart_adapter_keys.py +543 -0
freesolo_flash_dev-0.2.25/tests/test_vl_weight_sync.py +181 -0
freesolo_flash_dev-0.2.25/tests/test_wandb_naming.py +337 -0
freesolo_flash_dev-0.2.25/tests/test_warmstart_cross_repo.py +49 -0
freesolo_flash_dev-0.2.25/tests/test_weight_cache.py +1825 -0
freesolo_flash_dev-0.2.25/tests/test_worker_dryrun.py +233 -0
freesolo_flash_dev-0.2.25/tests/test_worker_hardexit.py +81 -0
freesolo_flash_dev-0.2.25/tests/test_worker_image.py +67 -0
freesolo_flash_dev-0.2.25/tests/test_worker_stack.py +1085 -0
freesolo_flash_dev-0.2.25/tests/test_worker_thinking.py +155 -0
freesolo_flash_dev-0.2.25/uv.lock +4221 -0

freesolo_flash_dev-0.2.25/.dockerignore ADDED Viewed

@@ -0,0 +1,20 @@
+.venv/
+venv/
+__pycache__/
+*.pyc
+.ruff_cache/
+.cache/
+.flash/
+results/
+hf_cache/
+uv-cache/
+dist/
+build/*
+# ...but keep the opt-in kernel-cache staging dir in the build context: Dockerfile.worker
+# copies build/kernel_cache/ (the .keep placeholder makes the source always exist; the
+# gpu-produced mega_cache.bin + mega_cache.json ride along on a kernel-cache bake).
+!build/kernel_cache/
+!build/kernel_cache/**
+*.egg-info/
+.env
+.flash/

freesolo_flash_dev-0.2.25/.env.example ADDED Viewed

@@ -0,0 +1,34 @@
+# Flash control plane (operator-side). Copy to .env and fill in.
+# Provider credentials live ONLY here — clients never see them and authenticate
+# with their freesolo API key (`flash login`).
+# GPU substrate. RunPod is the default; Vast is opt-in (only required when set).
+RUNPOD_API_KEY=
+VAST_API_KEY=
+# HuggingFace token with write access to each run's [train] hf_repo (code upload +
+# streamed checkpoints/adapters land in that per-run dataset repo). The artifact repo
+# is per-run (set in the run TOML's [train] hf_repo), not an operator-wide env var.
+HF_TOKEN=
+# Prime Intellect API key for FreeSolo's MANAGED Prime account. Used to (1) `prime env install`
+# the run's Hub environment on the worker, and (2) publish user-uploaded envs (`flash env push` ->
+# POST /v1/envs) under this one account — so users never need their own Prime account. The `prime`
+# CLI must be installed on the control plane (it ships in the `server` extra).
+PRIME_API_KEY=
+# --- FreeSolo auth (hosted deployment) ---
+# User auth is freesolo API keys only. An unknown bearer token is verified against
+# {FREESOLO_BASE_URL}/api/auth/verify and resolved to a per-user identity.
+# The shared freesolo internal key (same value the platform/SDK already hold) maps
+# to a single service identity without a network call. The same key authenticates
+# flash's adapter registration calls to the freesolo serving app (below).
+FREESOLO_INTERNAL_KEY=
+# Where to verify user keys. In compose this is the backend on the internal network;
+# defaults to https://api.freesolo.co when unset.
+FREESOLO_BASE_URL=http://backend:8000
+# --- FreeSolo serving (adapter deploy/chat) ---
+# Adapter serving is delegated to the freesolo platform's multi-LoRA serving app (a
+# Modal app that serves every adapter on one GPU per base model, scaling to zero when
+# idle). `flash deploy`/`undeploy`/`chat` register/deregister/chat against it. Defaults
+# to the hosted Modal URL when unset.
+FREESOLO_SERVING_URL=https://clado-ai--freesolo-lora-serving.modal.run

freesolo_flash_dev-0.2.25/.github/workflows/bake-kernel-cache.yml ADDED Viewed

@@ -0,0 +1,136 @@
+name: flash worker kernel-cache (per-arch)
+# Builds per-SM worker images with the compiled-kernel mega-cache baked in, killing the ~10-15 min
+# first-use JIT on a cold worker (the regression #194 reintroduced). Each matrix leg:
+#   1. offloads the warmup to a RunPod GPU of that arch (GH runners have none) -> build/kernel_cache,
+#   2. docker build --build-arg BUILD_KERNEL_CACHE=true -> ghcr.io/freesolo-co/flash-worker:cu128-<sm>,
+#   3. pushes it. Activate by setting FLASH_WORKER_IMAGE_PER_SM=1 on the control plane so the
+#      allocator selects the cu128-<sm> tag per GPU class (see flash/providers/runpod/train/deps.py).
+#
+# Depends on the base ghcr.io/freesolo-co/flash-worker:cu128 image existing (worker-image.yml) — the
+# warmup runs INSIDE it so the cache matches the image's pinned torch/triton/fla/liger toolchain.
+#
+# Requires the RUNPOD_API_KEY repo secret (the warmup GPU pod) + the default GITHUB_TOKEN (GHCR push)
+# + HF_TOKEN (the temp dataset that ferries code + the cache artifact). Manual trigger; heavy + paid.
+on:
+  workflow_dispatch:
+    inputs:
+      sms:
+        description: "comma-separated sm list to bake; default is all validated arches"
+        default: "sm80,sm86,sm89,sm90,sm120"
+        required: false
+permissions:
+  contents: read
+  packages: write
+jobs:
+  bake:
+    runs-on: ubuntu-24.04-8core
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - {
+              sm: sm80,
+              arch: "8.0",
+              gpu_type_id: "NVIDIA A100 80GB PCIe",
+              allowed_cuda: "",
+            }
+          - {
+              sm: sm86,
+              arch: "8.6",
+              gpu_type_id: "NVIDIA RTX A6000",
+              allowed_cuda: "",
+            }
+          - {
+              sm: sm89,
+              arch: "8.9",
+              gpu_type_id: "NVIDIA GeForce RTX 4090",
+              allowed_cuda: "",
+            }
+          - {
+              sm: sm90,
+              arch: "9.0",
+              gpu_type_id: "NVIDIA H100 80GB HBM3",
+              allowed_cuda: "",
+            }
+          # Blackwell needs CUDA-13 hosts to JIT its PTX (matches min_cuda_for in the provider).
+          - {
+              sm: sm120,
+              arch: "12.0",
+              gpu_type_id: "NVIDIA GeForce RTX 5090",
+              allowed_cuda: "13.0",
+            }
+    steps:
+      - uses: actions/checkout@v6
+      # skip arches not requested in the dispatch input
+      - name: Should bake ${{ matrix.sm }}?
+        id: gate
+        run: |
+          req="${{ github.event.inputs.sms || 'sm80,sm86,sm89,sm90,sm120' }}"
+          req="${req// /}"   # tolerate spaces, e.g. "sm80, sm86"
+          case ",$req," in
+            *,${{ matrix.sm }},*) echo "run=true" >> "$GITHUB_OUTPUT" ;;
+            *) echo "run=false" >> "$GITHUB_OUTPUT"; echo "skipping ${{ matrix.sm }}" ;;
+          esac
+      - name: Install uv
+        if: steps.gate.outputs.run == 'true'
+        uses: astral-sh/setup-uv@v5
+      - name: Sync deps (flash + runpod + hf)
+        if: steps.gate.outputs.run == 'true'
+        run: uv sync --extra server
+      - name: Warm kernels on a RunPod ${{ matrix.sm }} GPU -> build/kernel_cache
+        if: steps.gate.outputs.run == 'true'
+        env:
+          RUNPOD_API_KEY: ${{ secrets.RUNPOD_API_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          uv run python docker/bake_kernel_cache.py \
+            --arch "${{ matrix.arch }}" --sm "${{ matrix.sm }}" \
+            --gpu-type-id "${{ matrix.gpu_type_id }}" \
+            --allowed-cuda "${{ matrix.allowed_cuda }}" \
+            --image ghcr.io/freesolo-co/flash-worker:cu128 \
+            --out build/kernel_cache
+      - name: Free disk space
+        if: steps.gate.outputs.run == 'true'
+        run: |
+          sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/.ghcup \
+            /usr/local/lib/android /opt/hostedtoolcache /usr/share/swift \
+            /usr/local/share/boost /usr/local/lib/node_modules \
+            /usr/local/share/powershell /usr/share/miniconda || true
+          sudo apt-get clean || true
+          sudo docker image prune -af || true
+          df -h /
+      - uses: docker/setup-buildx-action@v3
+        if: steps.gate.outputs.run == 'true'
+      - name: Log in to GHCR
+        if: steps.gate.outputs.run == 'true'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      # thin layer FROM the SAME base the warmup ran inside -> the cache toolchain always matches the
+      # shipped image (rebuilding the full Dockerfile.worker from the checkout could drift from the
+      # published base and bake a cache that silently won't load).
+      - name: Build + push baked per-sm image (thin layer on the warmed base)
+        if: steps.gate.outputs.run == 'true'
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: docker/Dockerfile.kernelcache
+          push: true
+          tags: ghcr.io/freesolo-co/flash-worker:cu128-${{ matrix.sm }}
+          provenance: false
+          build-args: |
+            BASE=ghcr.io/freesolo-co/flash-worker:cu128

freesolo_flash_dev-0.2.25/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,28 @@
+name: ci
+on:
+  push:
+    branches: [main, dev]
+  pull_request:
+    branches: [main, dev]
+jobs:
+  test:
+    name: lint + offline tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: "3.11"
+      - name: Sync (server extra + dev)
+        run: uv sync --extra server --dev
+      - name: Ruff
+        run: uv run ruff check .
+      - name: Tests (CPU, offline)
+        run: uv run pytest -q

freesolo_flash_dev-0.2.25/.github/workflows/main-source-guard.yml ADDED Viewed

@@ -0,0 +1,20 @@
+name: Main source guard
+on:
+  pull_request:
+    branches: [main]
+permissions:
+  contents: read
+jobs:
+  source-is-dev:
+    name: Source branch is dev
+    runs-on: ubuntu-latest
+    steps:
+      - name: Require PRs into main to originate from dev
+        env:
+          HEAD_REF: ${{ github.head_ref }}
+        run: |
+          if [ "$HEAD_REF" != "dev" ]; then
+            echo "::error::PRs into main must come from 'dev' (got '$HEAD_REF'). Merge into dev, then promote dev -> main."
+            exit 1
+          fi
+          echo "Source branch '$HEAD_REF' is allowed."

freesolo_flash_dev-0.2.25/.github/workflows/publish-dev.yml ADDED Viewed

@@ -0,0 +1,107 @@
+name: Publish flash dev-channel package
+# Auto-publish the dev-channel package `freesolo-flash-dev` (the `flash-dev` CLI, which defaults
+# to the staging plane flash-dev.freesolo.co) to PyPI from the `dev` branch.
+#
+# Trigger: a push to `dev` that touches package files. We publish iff the dev-channel version
+# ([tool.flash-dev].version in pyproject.toml) is NOT already on PyPI. So bumping that version and
+# merging to `dev` cuts a release; ordinary dev pushes (version unchanged -> already published)
+# no-op. This mirrors freesolo-flash's publish.yml, but keyed on the dev version and on `dev`
+# instead of `main`, and a no-op is a clean success (not a failure) since most dev pushes don't
+# bump it. Manual runs via workflow_dispatch force a publish attempt of the current dev version.
+on:
+  push:
+    branches:
+      - dev
+    paths:
+      - "pyproject.toml"
+      - "uv.lock"
+      - "flash/**"
+      - "scripts/build_dev_dist.py"
+      - ".github/workflows/publish-dev.yml"
+  workflow_dispatch:
+concurrency:
+  group: publish-flash-dev-${{ github.ref }}
+  cancel-in-progress: false
+jobs:
+  publish-pypi-dev:
+    name: Publish freesolo-flash-dev to PyPI
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    env:
+      UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+      # tomllib is stdlib only since 3.11; pin the interpreter before the metadata step below.
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Read dev-channel version
+        id: meta
+        run: |
+          python3 - <<'PY' >> "$GITHUB_OUTPUT"
+          import tomllib
+          with open("pyproject.toml", "rb") as f:
+              data = tomllib.load(f)
+          print(f"version={data['tool']['flash-dev']['version']}")
+          PY
+      - name: Decide whether to publish
+        id: decide
+        env:
+          VERSION: ${{ steps.meta.outputs.version }}
+        run: |
+          python3 - <<'PY' >> "$GITHUB_OUTPUT"
+          import os
+          import urllib.error
+          import urllib.request
+          version = os.environ["VERSION"]
+          url = f"https://pypi.org/pypi/freesolo-flash-dev/{version}/json"
+          published = False
+          try:
+              with urllib.request.urlopen(url, timeout=30) as response:
+                  published = response.status == 200
+          except urllib.error.HTTPError as error:
+              if error.code != 404:
+                  raise
+          # Publish only when the dev version isn't on PyPI yet (i.e. it was just bumped).
+          print(f"publish={'false' if published else 'true'}")
+          PY
+      - name: Already published (nothing to do)
+        if: steps.decide.outputs.publish == 'false'
+        run: echo "freesolo-flash-dev ${{ steps.meta.outputs.version }} is already on PyPI; nothing to publish."
+      - name: Install uv
+        if: steps.decide.outputs.publish == 'true'
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: "3.11"
+      - name: Build dev-channel distribution
+        if: steps.decide.outputs.publish == 'true'
+        run: |
+          rm -rf dist
+          # build_dev_dist.py renames the package to freesolo-flash-dev, retargets the CLI to
+          # flash-dev, flips the channel to the staging plane, then runs `uv build`.
+          python3 scripts/build_dev_dist.py
+      - name: Publish to PyPI
+        if: steps.decide.outputs.publish == 'true'
+        run: |
+          if [ -z "$UV_PUBLISH_TOKEN" ]; then
+            echo "::error::UV_PUBLISH_TOKEN is empty (sourced from the PYPI_API_TOKEN repo secret); refusing to publish without credentials. Set the PYPI_API_TOKEN secret on this repository."
+            exit 1
+          fi
+          uv publish

freesolo_flash_dev-0.2.25/.github/workflows/publish-image.yml ADDED Viewed

@@ -0,0 +1,69 @@
+name: Publish flash image
+on:
+  push:
+    branches:
+      - main
+      - dev
+    paths:
+      - "flash/**"
+      - "Dockerfile"
+      - "pyproject.toml"
+      - ".github/workflows/publish-image.yml"
+  workflow_dispatch:
+concurrency:
+  group: publish-flash-image-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  publish-flash-image:
+    name: Build and push flash control-plane image
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    env:
+      IMAGE: ghcr.io/freesolo-co/freesolo-flash
+    steps:
+      - uses: actions/checkout@v6
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Compute tags
+        id: tags
+        run: |
+          # main publishes :main + :latest; any other branch (dev) publishes
+          # :<branch>. every build also gets an immutable :sha-<short> tag.
+          short_sha="$(git rev-parse --short HEAD)"
+          branch="${GITHUB_REF_NAME}"
+          {
+            echo "tags<<EOF"
+            if [ "$branch" = "main" ]; then
+              echo "${IMAGE}:main"
+              echo "${IMAGE}:latest"
+            else
+              echo "${IMAGE}:${branch}"
+            fi
+            echo "${IMAGE}:sha-${short_sha}"
+            echo "EOF"
+          } >> "$GITHUB_OUTPUT"
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          push: true
+          tags: ${{ steps.tags.outputs.tags }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=min

freesolo_flash_dev-0.2.25/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,174 @@
+name: Publish flash package
+# Auto-publish freesolo-flash to PyPI when a push to main bumps the version in pyproject.toml.
+# Mirrors freesolo-sdk's publish flow: a push that touches package files MUST also raise the
+# version (else CI fails), and a version already on PyPI is rejected — so merging dev -> main
+# with a bumped version is all it takes to release. Manual runs are still possible via
+# workflow_dispatch.
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - "pyproject.toml"
+      - "uv.lock"
+      - "flash/**"
+      - ".github/workflows/publish.yml"
+  workflow_dispatch:
+concurrency:
+  group: publish-flash-${{ github.ref }}
+  cancel-in-progress: false
+jobs:
+  publish-pypi:
+    name: Publish flash PyPI package
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    env:
+      UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+      # Pin Python >= 3.11 BEFORE any step that imports tomllib (stdlib only since 3.11).
+      # The metadata + version-bump steps below run `python3 -c "import tomllib"`, which would
+      # fail on a runner whose default python3 predates 3.11 — so set it up here, not later
+      # (the uv step that follows brings its own interpreter, but only after those tomllib steps).
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Read package metadata
+        id: metadata
+        run: |
+          python3 - <<'PY' >> "$GITHUB_OUTPUT"
+          import tomllib
+          with open("pyproject.toml", "rb") as f:
+              project = tomllib.load(f)["project"]
+          print(f"name={project['name']}")
+          print(f"version={project['version']}")
+          PY
+      - name: Require version bump for package changes
+        id: changes
+        if: github.event_name == 'push'
+        env:
+          BEFORE_SHA: ${{ github.event.before }}
+          CURRENT_SHA: ${{ github.sha }}
+        run: |
+          python3 - <<'PY'
+          import os
+          import re
+          import subprocess
+          import sys
+          import tomllib
+          from pathlib import Path
+          output_path = Path(os.environ["GITHUB_OUTPUT"])
+          def set_package_changed(value: bool) -> None:
+              with output_path.open("a", encoding="utf-8") as output:
+                  output.write(f"package_changed={'true' if value else 'false'}\n")
+          before = os.environ["BEFORE_SHA"]
+          current = os.environ["CURRENT_SHA"]
+          # First push to the branch (no parent): publish whatever version is present.
+          if not before or set(before) == {"0"}:
+              set_package_changed(True)
+              raise SystemExit(0)
+          changed = subprocess.check_output(
+              ["git", "diff", "--name-only", before, current],
+              text=True,
+          ).splitlines()
+          # Package files = the importable source, the project metadata, and the lockfile.
+          package_changed = any(
+              path == "pyproject.toml"
+              or path == "uv.lock"
+              or path.startswith("flash/")
+              for path in changed
+          )
+          if not package_changed:
+              set_package_changed(False)
+              raise SystemExit(0)
+          set_package_changed(True)
+          def read_version(data: str) -> str:
+              return tomllib.loads(data)["project"]["version"]
+          def version_key(value: str) -> tuple[int, ...]:
+              return tuple(int(part) for part in re.findall(r"\d+", value))
+          previous_version = read_version(
+              subprocess.check_output(
+                  ["git", "show", f"{before}:pyproject.toml"],
+                  text=True,
+              )
+          )
+          current_version = read_version(Path("pyproject.toml").read_text())
+          if version_key(current_version) <= version_key(previous_version):
+              print(
+                  "::error::Package files changed but pyproject.toml version "
+                  f"did not increase: {previous_version} -> {current_version}"
+              )
+              sys.exit(1)
+          PY
+      - name: Fail if PyPI version already exists
+        if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
+        env:
+          PACKAGE_NAME: ${{ steps.metadata.outputs.name }}
+          PACKAGE_VERSION: ${{ steps.metadata.outputs.version }}
+        run: |
+          python3 - <<'PY'
+          import os
+          import urllib.error
+          import urllib.request
+          name = os.environ["PACKAGE_NAME"]
+          version = os.environ["PACKAGE_VERSION"]
+          url = f"https://pypi.org/pypi/{name}/{version}/json"
+          try:
+              with urllib.request.urlopen(url, timeout=30) as response:
+                  if response.status == 200:
+                      raise SystemExit(
+                          f"{name} {version} is already on PyPI. "
+                          "Bump pyproject.toml before publishing."
+                      )
+          except urllib.error.HTTPError as error:
+              if error.code != 404:
+                  raise
+          PY
+      - name: Install uv
+        if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: "3.11"
+      - name: Build distributions
+        if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
+        run: |
+          rm -rf dist
+          uv build
+      - name: Publish to PyPI
+        if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
+        run: |
+          if [ -z "$UV_PUBLISH_TOKEN" ]; then
+            echo "::error::UV_PUBLISH_TOKEN is empty (sourced from the PYPI_API_TOKEN repo secret); refusing to publish without credentials. Set the PYPI_API_TOKEN secret on this repository."
+            exit 1
+          fi
+          uv publish
+      - name: No package changes
+        if: github.event_name == 'push' && steps.changes.outputs.package_changed == 'false'
+        run: echo "No package files changed; nothing to publish."

freesolo_flash_dev-0.2.25/.github/workflows/worker-image.yml ADDED Viewed

@@ -0,0 +1,85 @@
+name: flash worker image
+# Builds the prebuilt flash worker image (full training stack + flash-attn baked in) and
+# pushes it to GHCR, so Vast/RunPod cold-start skips the per-host dep install (the dominant
+# cold-start cost). Training and serving both default to the pinned WORKER_IMAGE constant
+# (flash/providers/runpod/train.py) — bump that to the new tag. FLASH_WORKER_IMAGE is an
+# operator env override honored by BOTH training (providers/runpod/{jobs,train}.py) and serving
+# (flash/serve/deploy.py).
+#
+# Triggered automatically on flash/** changes (worker training code is baked in; skipping a
+# rebuild leaves GPU workers running stale code) and manually for Dockerfile/dep changes.
+# The build is heavy (vllm + full training stack + flash-attn wheel) but Docker layer caching
+# means only the flash COPY+install layer reruns on pure Python changes (~minutes, not hours).
+on:
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: image tag
+        default: cu128
+        required: false
+      flash_attn_3_spec:
+        description: >-
+          FlashAttention-3 (Hopper sm90) install spec -> FLASH_ATTN_3_SPEC build-arg. Leave at the
+          default validated wheel to bake FA3 into every image; override to pin a different wheel.
+        default: "https://github.com/windreamer/flash-attention3-wheels/releases/download/2026.03.19-850211f/flash_attn_3-3.0.0%2B20260318.cu128torch2100cxx11abitrue.8afc61-cp39-abi3-linux_x86_64.whl"
+        required: false
+  push:
+    branches: [main]
+    paths:
+      - Dockerfile.worker
+      - .github/workflows/worker-image.yml
+      - flash/**
+      - pyproject.toml
+      # The baked rp_handler is generated from _train_body at build time (docker/make_rp_handler.py),
+      # so handler changes (e.g. the weight-cache preload branch) MUST rebuild the image. flash/**
+      # covers the handler source (endpoints.py); the generator lives outside flash/, so watch it too.
+      - docker/make_rp_handler.py
+permissions:
+  contents: read
+  packages: write
+jobs:
+  build:
+    runs-on: ubuntu-24.04-8core
+    steps:
+      - uses: actions/checkout@v6
+      - name: Free disk space
+        run: |
+          sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/.ghcup \
+            /usr/local/lib/android /opt/hostedtoolcache /usr/share/swift \
+            /usr/local/share/boost /usr/local/lib/node_modules \
+            /usr/local/share/powershell /usr/share/miniconda || true
+          sudo apt-get clean || true
+          sudo docker image prune -af || true
+          df -h /
+      - uses: docker/setup-buildx-action@v3
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build + push worker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile.worker
+          push: true
+          # Pin the freesolo-co namespace: both RunPod and Vast pull the hardcoded
+          # WORKER_IMAGE = ghcr.io/freesolo-co/flash-worker:cu128 (flash/providers/runpod/train.py),
+          # so a fork run must not push to its own owner namespace and leave the real image stale.
+          tags: ghcr.io/freesolo-co/flash-worker:${{ github.event.inputs.tag || 'cu128' }}
+          provenance: false
+          # Install a PREBUILT flash-attn wheel instead of compiling from source (a multi-arch
+          # source compile is huge and OOM-kills the runner). The wheel matches the base exactly
+          # (cu128 / torch2.10 / cp312).
+          build-args: |
+            FLASH_ATTN_SPEC=https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.0/flash_attn-2.8.3%2Bcu128torch2.10-cp312-cp312-linux_x86_64.whl
+            FLASH_ATTN_3_SPEC=${{ github.event.inputs.flash_attn_3_spec || 'https://github.com/windreamer/flash-attention3-wheels/releases/download/2026.03.19-850211f/flash_attn_3-3.0.0%2B20260318.cu128torch2100cxx11abitrue.8afc61-cp39-abi3-linux_x86_64.whl' }}