freesolo-flash-dev 0.2.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. freesolo_flash_dev-0.2.25/.dockerignore +20 -0
  2. freesolo_flash_dev-0.2.25/.env.example +34 -0
  3. freesolo_flash_dev-0.2.25/.github/workflows/bake-kernel-cache.yml +136 -0
  4. freesolo_flash_dev-0.2.25/.github/workflows/ci.yml +28 -0
  5. freesolo_flash_dev-0.2.25/.github/workflows/main-source-guard.yml +20 -0
  6. freesolo_flash_dev-0.2.25/.github/workflows/publish-dev.yml +107 -0
  7. freesolo_flash_dev-0.2.25/.github/workflows/publish-image.yml +69 -0
  8. freesolo_flash_dev-0.2.25/.github/workflows/publish.yml +174 -0
  9. freesolo_flash_dev-0.2.25/.github/workflows/worker-image.yml +85 -0
  10. freesolo_flash_dev-0.2.25/.gitignore +33 -0
  11. freesolo_flash_dev-0.2.25/Dockerfile +24 -0
  12. freesolo_flash_dev-0.2.25/Dockerfile.worker +204 -0
  13. freesolo_flash_dev-0.2.25/LICENSE +201 -0
  14. freesolo_flash_dev-0.2.25/PKG-INFO +192 -0
  15. freesolo_flash_dev-0.2.25/README.md +143 -0
  16. freesolo_flash_dev-0.2.25/build/kernel_cache/.gitignore +5 -0
  17. freesolo_flash_dev-0.2.25/build/kernel_cache/.keep +4 -0
  18. freesolo_flash_dev-0.2.25/docker/Dockerfile.kernelcache +18 -0
  19. freesolo_flash_dev-0.2.25/docker/bake_kernel_cache.py +240 -0
  20. freesolo_flash_dev-0.2.25/docker/bake_pod_entry.py +78 -0
  21. freesolo_flash_dev-0.2.25/docker/make_rp_handler.py +51 -0
  22. freesolo_flash_dev-0.2.25/docs/cli-style/README.md +33 -0
  23. freesolo_flash_dev-0.2.25/docs/cli-style/generate.py +503 -0
  24. freesolo_flash_dev-0.2.25/docs/cli-style/index.html +709 -0
  25. freesolo_flash_dev-0.2.25/docs/cli-style/preview.png +0 -0
  26. freesolo_flash_dev-0.2.25/docs/kernel-cache.md +100 -0
  27. freesolo_flash_dev-0.2.25/flash/__init__.py +29 -0
  28. freesolo_flash_dev-0.2.25/flash/_channel.py +23 -0
  29. freesolo_flash_dev-0.2.25/flash/_fileio.py +35 -0
  30. freesolo_flash_dev-0.2.25/flash/_logging.py +49 -0
  31. freesolo_flash_dev-0.2.25/flash/_update_check.py +266 -0
  32. freesolo_flash_dev-0.2.25/flash/catalog.py +253 -0
  33. freesolo_flash_dev-0.2.25/flash/cli/__init__.py +1 -0
  34. freesolo_flash_dev-0.2.25/flash/cli/main/__init__.py +227 -0
  35. freesolo_flash_dev-0.2.25/flash/cli/main/__main__.py +6 -0
  36. freesolo_flash_dev-0.2.25/flash/cli/main/commands.py +636 -0
  37. freesolo_flash_dev-0.2.25/flash/cli/main/envpush.py +317 -0
  38. freesolo_flash_dev-0.2.25/flash/cli/main/render.py +599 -0
  39. freesolo_flash_dev-0.2.25/flash/cli/main/training_doc.py +455 -0
  40. freesolo_flash_dev-0.2.25/flash/client/__init__.py +14 -0
  41. freesolo_flash_dev-0.2.25/flash/client/config.py +70 -0
  42. freesolo_flash_dev-0.2.25/flash/client/http.py +372 -0
  43. freesolo_flash_dev-0.2.25/flash/client/runtime_secrets.py +69 -0
  44. freesolo_flash_dev-0.2.25/flash/client/specs.py +20 -0
  45. freesolo_flash_dev-0.2.25/flash/cost/__init__.py +16 -0
  46. freesolo_flash_dev-0.2.25/flash/cost/analytical.py +175 -0
  47. freesolo_flash_dev-0.2.25/flash/cost/facts.py +114 -0
  48. freesolo_flash_dev-0.2.25/flash/cost/spec.py +113 -0
  49. freesolo_flash_dev-0.2.25/flash/cost/types.py +158 -0
  50. freesolo_flash_dev-0.2.25/flash/engine/__init__.py +6 -0
  51. freesolo_flash_dev-0.2.25/flash/engine/accounting.py +36 -0
  52. freesolo_flash_dev-0.2.25/flash/engine/chalk_kernels.py +116 -0
  53. freesolo_flash_dev-0.2.25/flash/engine/multiturn_rollout.py +780 -0
  54. freesolo_flash_dev-0.2.25/flash/engine/recipe.py +86 -0
  55. freesolo_flash_dev-0.2.25/flash/engine/vram.py +603 -0
  56. freesolo_flash_dev-0.2.25/flash/engine/worker/__init__.py +2916 -0
  57. freesolo_flash_dev-0.2.25/flash/engine/worker/__main__.py +4 -0
  58. freesolo_flash_dev-0.2.25/flash/engine/worker/kernel_warmup.py +400 -0
  59. freesolo_flash_dev-0.2.25/flash/engine/worker/lora.py +796 -0
  60. freesolo_flash_dev-0.2.25/flash/engine/worker/packing.py +366 -0
  61. freesolo_flash_dev-0.2.25/flash/engine/worker/perf.py +1048 -0
  62. freesolo_flash_dev-0.2.25/flash/envs/__init__.py +10 -0
  63. freesolo_flash_dev-0.2.25/flash/envs/adapter/__init__.py +883 -0
  64. freesolo_flash_dev-0.2.25/flash/envs/adapter/rubric.py +222 -0
  65. freesolo_flash_dev-0.2.25/flash/envs/base.py +52 -0
  66. freesolo_flash_dev-0.2.25/flash/envs/registry.py +62 -0
  67. freesolo_flash_dev-0.2.25/flash/mcp/__init__.py +1 -0
  68. freesolo_flash_dev-0.2.25/flash/mcp/server.py +85 -0
  69. freesolo_flash_dev-0.2.25/flash/providers/__init__.py +59 -0
  70. freesolo_flash_dev-0.2.25/flash/providers/_auth.py +24 -0
  71. freesolo_flash_dev-0.2.25/flash/providers/_http.py +230 -0
  72. freesolo_flash_dev-0.2.25/flash/providers/_instance.py +416 -0
  73. freesolo_flash_dev-0.2.25/flash/providers/_instance_bootstrap.py +517 -0
  74. freesolo_flash_dev-0.2.25/flash/providers/_poll.py +311 -0
  75. freesolo_flash_dev-0.2.25/flash/providers/allocator.py +193 -0
  76. freesolo_flash_dev-0.2.25/flash/providers/base.py +431 -0
  77. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/__init__.py +127 -0
  78. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/api.py +522 -0
  79. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/auth.py +17 -0
  80. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/gpus.py +29 -0
  81. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/__init__.py +632 -0
  82. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/builders.py +122 -0
  83. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/preflight.py +23 -0
  84. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/pricing.py +26 -0
  85. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/train.py +25 -0
  86. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/__init__.py +139 -0
  87. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/api.py +261 -0
  88. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/auth.py +18 -0
  89. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/gpus.py +29 -0
  90. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/jobs/__init__.py +724 -0
  91. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/jobs/builders.py +118 -0
  92. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/preflight.py +27 -0
  93. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/pricing.py +51 -0
  94. freesolo_flash_dev-0.2.25/flash/providers/lambdalabs/train.py +27 -0
  95. freesolo_flash_dev-0.2.25/flash/providers/preflight.py +55 -0
  96. freesolo_flash_dev-0.2.25/flash/providers/realized.py +80 -0
  97. freesolo_flash_dev-0.2.25/flash/providers/runpod/__init__.py +130 -0
  98. freesolo_flash_dev-0.2.25/flash/providers/runpod/api.py +186 -0
  99. freesolo_flash_dev-0.2.25/flash/providers/runpod/auth.py +37 -0
  100. freesolo_flash_dev-0.2.25/flash/providers/runpod/cost.py +57 -0
  101. freesolo_flash_dev-0.2.25/flash/providers/runpod/gpus.py +46 -0
  102. freesolo_flash_dev-0.2.25/flash/providers/runpod/jobs.py +956 -0
  103. freesolo_flash_dev-0.2.25/flash/providers/runpod/keys.py +139 -0
  104. freesolo_flash_dev-0.2.25/flash/providers/runpod/preflight.py +30 -0
  105. freesolo_flash_dev-0.2.25/flash/providers/runpod/preload.py +915 -0
  106. freesolo_flash_dev-0.2.25/flash/providers/runpod/pricing.py +18 -0
  107. freesolo_flash_dev-0.2.25/flash/providers/runpod/slots.py +79 -0
  108. freesolo_flash_dev-0.2.25/flash/providers/runpod/train/__init__.py +150 -0
  109. freesolo_flash_dev-0.2.25/flash/providers/runpod/train/deps.py +395 -0
  110. freesolo_flash_dev-0.2.25/flash/providers/runpod/train/endpoints.py +820 -0
  111. freesolo_flash_dev-0.2.25/flash/py.typed +0 -0
  112. freesolo_flash_dev-0.2.25/flash/runner/__init__.py +686 -0
  113. freesolo_flash_dev-0.2.25/flash/runner/checkpoints.py +82 -0
  114. freesolo_flash_dev-0.2.25/flash/runner/deploy.py +422 -0
  115. freesolo_flash_dev-0.2.25/flash/runner/lifecycle.py +672 -0
  116. freesolo_flash_dev-0.2.25/flash/schema/__init__.py +375 -0
  117. freesolo_flash_dev-0.2.25/flash/schema/fields.py +331 -0
  118. freesolo_flash_dev-0.2.25/flash/serve/__init__.py +1 -0
  119. freesolo_flash_dev-0.2.25/flash/serve/deploy.py +326 -0
  120. freesolo_flash_dev-0.2.25/flash/serve/pricing.py +60 -0
  121. freesolo_flash_dev-0.2.25/flash/server/__init__.py +1 -0
  122. freesolo_flash_dev-0.2.25/flash/server/__main__.py +20 -0
  123. freesolo_flash_dev-0.2.25/flash/server/app.py +961 -0
  124. freesolo_flash_dev-0.2.25/flash/server/auth.py +263 -0
  125. freesolo_flash_dev-0.2.25/flash/server/billing.py +124 -0
  126. freesolo_flash_dev-0.2.25/flash/server/checkpoints.py +110 -0
  127. freesolo_flash_dev-0.2.25/flash/server/db.py +160 -0
  128. freesolo_flash_dev-0.2.25/flash/server/environment_registry.py +102 -0
  129. freesolo_flash_dev-0.2.25/flash/server/envs.py +360 -0
  130. freesolo_flash_dev-0.2.25/flash/server/reconcile.py +163 -0
  131. freesolo_flash_dev-0.2.25/flash/server/run_registry.py +150 -0
  132. freesolo_flash_dev-0.2.25/flash/spec.py +333 -0
  133. freesolo_flash_dev-0.2.25/pyproject.toml +161 -0
  134. freesolo_flash_dev-0.2.25/scripts/build_dev_dist.py +130 -0
  135. freesolo_flash_dev-0.2.25/tests/__init__.py +2 -0
  136. freesolo_flash_dev-0.2.25/tests/_helpers/__init__.py +1 -0
  137. freesolo_flash_dev-0.2.25/tests/_helpers/runner.py +25 -0
  138. freesolo_flash_dev-0.2.25/tests/_helpers/specs.py +18 -0
  139. freesolo_flash_dev-0.2.25/tests/conftest.py +47 -0
  140. freesolo_flash_dev-0.2.25/tests/fixtures/math_eval.jsonl +3 -0
  141. freesolo_flash_dev-0.2.25/tests/fixtures/math_train.jsonl +2 -0
  142. freesolo_flash_dev-0.2.25/tests/live/__init__.py +0 -0
  143. freesolo_flash_dev-0.2.25/tests/live/conftest.py +35 -0
  144. freesolo_flash_dev-0.2.25/tests/live/test_hyperstack_live.py +50 -0
  145. freesolo_flash_dev-0.2.25/tests/live/test_lambda_live.py +49 -0
  146. freesolo_flash_dev-0.2.25/tests/live/test_runpod_live.py +36 -0
  147. freesolo_flash_dev-0.2.25/tests/test_agent_flash_cli_contract.py +228 -0
  148. freesolo_flash_dev-0.2.25/tests/test_algorithms.py +71 -0
  149. freesolo_flash_dev-0.2.25/tests/test_allocator.py +390 -0
  150. freesolo_flash_dev-0.2.25/tests/test_backend_jobspec_contract.py +152 -0
  151. freesolo_flash_dev-0.2.25/tests/test_cancel_remote.py +666 -0
  152. freesolo_flash_dev-0.2.25/tests/test_catalog_consistency.py +57 -0
  153. freesolo_flash_dev-0.2.25/tests/test_chalk_kernels.py +114 -0
  154. freesolo_flash_dev-0.2.25/tests/test_checkpoints.py +298 -0
  155. freesolo_flash_dev-0.2.25/tests/test_cli_commands.py +380 -0
  156. freesolo_flash_dev-0.2.25/tests/test_cli_errors.py +121 -0
  157. freesolo_flash_dev-0.2.25/tests/test_cli_estimate.py +274 -0
  158. freesolo_flash_dev-0.2.25/tests/test_cli_managed.py +144 -0
  159. freesolo_flash_dev-0.2.25/tests/test_cli_render_theme.py +159 -0
  160. freesolo_flash_dev-0.2.25/tests/test_client.py +250 -0
  161. freesolo_flash_dev-0.2.25/tests/test_client_server_integration.py +273 -0
  162. freesolo_flash_dev-0.2.25/tests/test_config_overrides.py +64 -0
  163. freesolo_flash_dev-0.2.25/tests/test_cost_analytical.py +244 -0
  164. freesolo_flash_dev-0.2.25/tests/test_cost_equation.py +46 -0
  165. freesolo_flash_dev-0.2.25/tests/test_cost_estimate.py +79 -0
  166. freesolo_flash_dev-0.2.25/tests/test_cost_hardware.py +87 -0
  167. freesolo_flash_dev-0.2.25/tests/test_cost_models.py +36 -0
  168. freesolo_flash_dev-0.2.25/tests/test_cost_rewards.py +65 -0
  169. freesolo_flash_dev-0.2.25/tests/test_dev_channel.py +99 -0
  170. freesolo_flash_dev-0.2.25/tests/test_disk_gb.py +95 -0
  171. freesolo_flash_dev-0.2.25/tests/test_endpoint_name.py +45 -0
  172. freesolo_flash_dev-0.2.25/tests/test_env_install.py +43 -0
  173. freesolo_flash_dev-0.2.25/tests/test_env_publish.py +522 -0
  174. freesolo_flash_dev-0.2.25/tests/test_env_push.py +314 -0
  175. freesolo_flash_dev-0.2.25/tests/test_env_rate_limit_resolve.py +275 -0
  176. freesolo_flash_dev-0.2.25/tests/test_envs_coverage.py +92 -0
  177. freesolo_flash_dev-0.2.25/tests/test_flash_mvp.py +139 -0
  178. freesolo_flash_dev-0.2.25/tests/test_flash_worker.py +367 -0
  179. freesolo_flash_dev-0.2.25/tests/test_github_urlopen_retry.py +118 -0
  180. freesolo_flash_dev-0.2.25/tests/test_gpus.py +162 -0
  181. freesolo_flash_dev-0.2.25/tests/test_grpo_mask_aware.py +180 -0
  182. freesolo_flash_dev-0.2.25/tests/test_grpo_params.py +626 -0
  183. freesolo_flash_dev-0.2.25/tests/test_grpo_sleep_gate.py +97 -0
  184. freesolo_flash_dev-0.2.25/tests/test_hyperstack_runner.py +1031 -0
  185. freesolo_flash_dev-0.2.25/tests/test_idle_endpoint_reaper.py +285 -0
  186. freesolo_flash_dev-0.2.25/tests/test_jobs.py +1777 -0
  187. freesolo_flash_dev-0.2.25/tests/test_kernel_cache.py +112 -0
  188. freesolo_flash_dev-0.2.25/tests/test_kv_util.py +75 -0
  189. freesolo_flash_dev-0.2.25/tests/test_lambda_runner.py +1394 -0
  190. freesolo_flash_dev-0.2.25/tests/test_logging.py +51 -0
  191. freesolo_flash_dev-0.2.25/tests/test_login_perms.py +91 -0
  192. freesolo_flash_dev-0.2.25/tests/test_managed_hf_repo.py +65 -0
  193. freesolo_flash_dev-0.2.25/tests/test_metrics_schema_agent_contract.py +170 -0
  194. freesolo_flash_dev-0.2.25/tests/test_mig_guard.py +70 -0
  195. freesolo_flash_dev-0.2.25/tests/test_multiturn_rollout.py +785 -0
  196. freesolo_flash_dev-0.2.25/tests/test_open_model_policy.py +95 -0
  197. freesolo_flash_dev-0.2.25/tests/test_orchestrator_flash.py +232 -0
  198. freesolo_flash_dev-0.2.25/tests/test_packing.py +427 -0
  199. freesolo_flash_dev-0.2.25/tests/test_poll_helpers.py +52 -0
  200. freesolo_flash_dev-0.2.25/tests/test_preflight.py +98 -0
  201. freesolo_flash_dev-0.2.25/tests/test_provider_routing.py +426 -0
  202. freesolo_flash_dev-0.2.25/tests/test_provider_teardown_robustness.py +223 -0
  203. freesolo_flash_dev-0.2.25/tests/test_providers_symmetry.py +164 -0
  204. freesolo_flash_dev-0.2.25/tests/test_realized_cost.py +400 -0
  205. freesolo_flash_dev-0.2.25/tests/test_resolve_params_b.py +50 -0
  206. freesolo_flash_dev-0.2.25/tests/test_runmgmt.py +204 -0
  207. freesolo_flash_dev-0.2.25/tests/test_runpod_api_delete.py +90 -0
  208. freesolo_flash_dev-0.2.25/tests/test_runpod_key_waterfall.py +323 -0
  209. freesolo_flash_dev-0.2.25/tests/test_runpod_slots.py +236 -0
  210. freesolo_flash_dev-0.2.25/tests/test_serve.py +428 -0
  211. freesolo_flash_dev-0.2.25/tests/test_server_api.py +1367 -0
  212. freesolo_flash_dev-0.2.25/tests/test_server_billing.py +388 -0
  213. freesolo_flash_dev-0.2.25/tests/test_server_db.py +234 -0
  214. freesolo_flash_dev-0.2.25/tests/test_serving_contract.py +161 -0
  215. freesolo_flash_dev-0.2.25/tests/test_spec_and_validation.py +501 -0
  216. freesolo_flash_dev-0.2.25/tests/test_thinking_config.py +118 -0
  217. freesolo_flash_dev-0.2.25/tests/test_update_check.py +344 -0
  218. freesolo_flash_dev-0.2.25/tests/test_verifiers.py +596 -0
  219. freesolo_flash_dev-0.2.25/tests/test_version.py +49 -0
  220. freesolo_flash_dev-0.2.25/tests/test_vl_warmstart_adapter_keys.py +543 -0
  221. freesolo_flash_dev-0.2.25/tests/test_vl_weight_sync.py +181 -0
  222. freesolo_flash_dev-0.2.25/tests/test_wandb_naming.py +337 -0
  223. freesolo_flash_dev-0.2.25/tests/test_warmstart_cross_repo.py +49 -0
  224. freesolo_flash_dev-0.2.25/tests/test_weight_cache.py +1825 -0
  225. freesolo_flash_dev-0.2.25/tests/test_worker_dryrun.py +233 -0
  226. freesolo_flash_dev-0.2.25/tests/test_worker_hardexit.py +81 -0
  227. freesolo_flash_dev-0.2.25/tests/test_worker_image.py +67 -0
  228. freesolo_flash_dev-0.2.25/tests/test_worker_stack.py +1085 -0
  229. freesolo_flash_dev-0.2.25/tests/test_worker_thinking.py +155 -0
  230. freesolo_flash_dev-0.2.25/uv.lock +4221 -0
@@ -0,0 +1,20 @@
1
+ .venv/
2
+ venv/
3
+ __pycache__/
4
+ *.pyc
5
+ .ruff_cache/
6
+ .cache/
7
+ .flash/
8
+ results/
9
+ hf_cache/
10
+ uv-cache/
11
+ dist/
12
+ build/*
13
+ # ...but keep the opt-in kernel-cache staging dir in the build context: Dockerfile.worker
14
+ # copies build/kernel_cache/ (the .keep placeholder makes the source always exist; the
15
+ # gpu-produced mega_cache.bin + mega_cache.json ride along on a kernel-cache bake).
16
+ !build/kernel_cache/
17
+ !build/kernel_cache/**
18
+ *.egg-info/
19
+ .env
20
+ .flash/
@@ -0,0 +1,34 @@
1
+ # Flash control plane (operator-side). Copy to .env and fill in.
2
+ # Provider credentials live ONLY here — clients never see them and authenticate
3
+ # with their freesolo API key (`flash login`).
4
+
5
+ # GPU substrate. RunPod is the default; Vast is opt-in (only required when set).
6
+ RUNPOD_API_KEY=
7
+ VAST_API_KEY=
8
+ # HuggingFace token with write access to each run's [train] hf_repo (code upload +
9
+ # streamed checkpoints/adapters land in that per-run dataset repo). The artifact repo
10
+ # is per-run (set in the run TOML's [train] hf_repo), not an operator-wide env var.
11
+ HF_TOKEN=
12
+ # Prime Intellect API key for FreeSolo's MANAGED Prime account. Used to (1) `prime env install`
13
+ # the run's Hub environment on the worker, and (2) publish user-uploaded envs (`flash env push` ->
14
+ # POST /v1/envs) under this one account — so users never need their own Prime account. The `prime`
15
+ # CLI must be installed on the control plane (it ships in the `server` extra).
16
+ PRIME_API_KEY=
17
+
18
+ # --- FreeSolo auth (hosted deployment) ---
19
+ # User auth is freesolo API keys only. An unknown bearer token is verified against
20
+ # {FREESOLO_BASE_URL}/api/auth/verify and resolved to a per-user identity.
21
+ # The shared freesolo internal key (same value the platform/SDK already hold) maps
22
+ # to a single service identity without a network call. The same key authenticates
23
+ # flash's adapter registration calls to the freesolo serving app (below).
24
+ FREESOLO_INTERNAL_KEY=
25
+ # Where to verify user keys. In compose this is the backend on the internal network;
26
+ # defaults to https://api.freesolo.co when unset.
27
+ FREESOLO_BASE_URL=http://backend:8000
28
+
29
+ # --- FreeSolo serving (adapter deploy/chat) ---
30
+ # Adapter serving is delegated to the freesolo platform's multi-LoRA serving app (a
31
+ # Modal app that serves every adapter on one GPU per base model, scaling to zero when
32
+ # idle). `flash deploy`/`undeploy`/`chat` register/deregister/chat against it. Defaults
33
+ # to the hosted Modal URL when unset.
34
+ FREESOLO_SERVING_URL=https://clado-ai--freesolo-lora-serving.modal.run
@@ -0,0 +1,136 @@
1
+ name: flash worker kernel-cache (per-arch)
2
+
3
+ # Builds per-SM worker images with the compiled-kernel mega-cache baked in, killing the ~10-15 min
4
+ # first-use JIT on a cold worker (the regression #194 reintroduced). Each matrix leg:
5
+ # 1. offloads the warmup to a RunPod GPU of that arch (GH runners have none) -> build/kernel_cache,
6
+ # 2. docker build --build-arg BUILD_KERNEL_CACHE=true -> ghcr.io/freesolo-co/flash-worker:cu128-<sm>,
7
+ # 3. pushes it. Activate by setting FLASH_WORKER_IMAGE_PER_SM=1 on the control plane so the
8
+ # allocator selects the cu128-<sm> tag per GPU class (see flash/providers/runpod/train/deps.py).
9
+ #
10
+ # Depends on the base ghcr.io/freesolo-co/flash-worker:cu128 image existing (worker-image.yml) — the
11
+ # warmup runs INSIDE it so the cache matches the image's pinned torch/triton/fla/liger toolchain.
12
+ #
13
+ # Requires the RUNPOD_API_KEY repo secret (the warmup GPU pod) + the default GITHUB_TOKEN (GHCR push)
14
+ # + HF_TOKEN (the temp dataset that ferries code + the cache artifact). Manual trigger; heavy + paid.
15
+
16
+ on:
17
+ workflow_dispatch:
18
+ inputs:
19
+ sms:
20
+ description: "comma-separated sm list to bake; default is all validated arches"
21
+ default: "sm80,sm86,sm89,sm90,sm120"
22
+ required: false
23
+
24
+ permissions:
25
+ contents: read
26
+ packages: write
27
+
28
+ jobs:
29
+ bake:
30
+ runs-on: ubuntu-24.04-8core
31
+ strategy:
32
+ fail-fast: false
33
+ matrix:
34
+ include:
35
+ - {
36
+ sm: sm80,
37
+ arch: "8.0",
38
+ gpu_type_id: "NVIDIA A100 80GB PCIe",
39
+ allowed_cuda: "",
40
+ }
41
+ - {
42
+ sm: sm86,
43
+ arch: "8.6",
44
+ gpu_type_id: "NVIDIA RTX A6000",
45
+ allowed_cuda: "",
46
+ }
47
+ - {
48
+ sm: sm89,
49
+ arch: "8.9",
50
+ gpu_type_id: "NVIDIA GeForce RTX 4090",
51
+ allowed_cuda: "",
52
+ }
53
+ - {
54
+ sm: sm90,
55
+ arch: "9.0",
56
+ gpu_type_id: "NVIDIA H100 80GB HBM3",
57
+ allowed_cuda: "",
58
+ }
59
+ # Blackwell needs CUDA-13 hosts to JIT its PTX (matches min_cuda_for in the provider).
60
+ - {
61
+ sm: sm120,
62
+ arch: "12.0",
63
+ gpu_type_id: "NVIDIA GeForce RTX 5090",
64
+ allowed_cuda: "13.0",
65
+ }
66
+ steps:
67
+ - uses: actions/checkout@v6
68
+
69
+ # skip arches not requested in the dispatch input
70
+ - name: Should bake ${{ matrix.sm }}?
71
+ id: gate
72
+ run: |
73
+ req="${{ github.event.inputs.sms || 'sm80,sm86,sm89,sm90,sm120' }}"
74
+ req="${req// /}" # tolerate spaces, e.g. "sm80, sm86"
75
+ case ",$req," in
76
+ *,${{ matrix.sm }},*) echo "run=true" >> "$GITHUB_OUTPUT" ;;
77
+ *) echo "run=false" >> "$GITHUB_OUTPUT"; echo "skipping ${{ matrix.sm }}" ;;
78
+ esac
79
+
80
+ - name: Install uv
81
+ if: steps.gate.outputs.run == 'true'
82
+ uses: astral-sh/setup-uv@v5
83
+
84
+ - name: Sync deps (flash + runpod + hf)
85
+ if: steps.gate.outputs.run == 'true'
86
+ run: uv sync --extra server
87
+
88
+ - name: Warm kernels on a RunPod ${{ matrix.sm }} GPU -> build/kernel_cache
89
+ if: steps.gate.outputs.run == 'true'
90
+ env:
91
+ RUNPOD_API_KEY: ${{ secrets.RUNPOD_API_KEY }}
92
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
93
+ run: |
94
+ uv run python docker/bake_kernel_cache.py \
95
+ --arch "${{ matrix.arch }}" --sm "${{ matrix.sm }}" \
96
+ --gpu-type-id "${{ matrix.gpu_type_id }}" \
97
+ --allowed-cuda "${{ matrix.allowed_cuda }}" \
98
+ --image ghcr.io/freesolo-co/flash-worker:cu128 \
99
+ --out build/kernel_cache
100
+
101
+ - name: Free disk space
102
+ if: steps.gate.outputs.run == 'true'
103
+ run: |
104
+ sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/.ghcup \
105
+ /usr/local/lib/android /opt/hostedtoolcache /usr/share/swift \
106
+ /usr/local/share/boost /usr/local/lib/node_modules \
107
+ /usr/local/share/powershell /usr/share/miniconda || true
108
+ sudo apt-get clean || true
109
+ sudo docker image prune -af || true
110
+ df -h /
111
+
112
+ - uses: docker/setup-buildx-action@v3
113
+ if: steps.gate.outputs.run == 'true'
114
+
115
+ - name: Log in to GHCR
116
+ if: steps.gate.outputs.run == 'true'
117
+ uses: docker/login-action@v3
118
+ with:
119
+ registry: ghcr.io
120
+ username: ${{ github.actor }}
121
+ password: ${{ secrets.GITHUB_TOKEN }}
122
+
123
+ # thin layer FROM the SAME base the warmup ran inside -> the cache toolchain always matches the
124
+ # shipped image (rebuilding the full Dockerfile.worker from the checkout could drift from the
125
+ # published base and bake a cache that silently won't load).
126
+ - name: Build + push baked per-sm image (thin layer on the warmed base)
127
+ if: steps.gate.outputs.run == 'true'
128
+ uses: docker/build-push-action@v6
129
+ with:
130
+ context: .
131
+ file: docker/Dockerfile.kernelcache
132
+ push: true
133
+ tags: ghcr.io/freesolo-co/flash-worker:cu128-${{ matrix.sm }}
134
+ provenance: false
135
+ build-args: |
136
+ BASE=ghcr.io/freesolo-co/flash-worker:cu128
@@ -0,0 +1,28 @@
1
+ name: ci
2
+
3
+ on:
4
+ push:
5
+ branches: [main, dev]
6
+ pull_request:
7
+ branches: [main, dev]
8
+
9
+ jobs:
10
+ test:
11
+ name: lint + offline tests
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v6
15
+
16
+ - name: Install uv
17
+ uses: astral-sh/setup-uv@v6
18
+ with:
19
+ python-version: "3.11"
20
+
21
+ - name: Sync (server extra + dev)
22
+ run: uv sync --extra server --dev
23
+
24
+ - name: Ruff
25
+ run: uv run ruff check .
26
+
27
+ - name: Tests (CPU, offline)
28
+ run: uv run pytest -q
@@ -0,0 +1,20 @@
1
+ name: Main source guard
2
+ on:
3
+ pull_request:
4
+ branches: [main]
5
+ permissions:
6
+ contents: read
7
+ jobs:
8
+ source-is-dev:
9
+ name: Source branch is dev
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Require PRs into main to originate from dev
13
+ env:
14
+ HEAD_REF: ${{ github.head_ref }}
15
+ run: |
16
+ if [ "$HEAD_REF" != "dev" ]; then
17
+ echo "::error::PRs into main must come from 'dev' (got '$HEAD_REF'). Merge into dev, then promote dev -> main."
18
+ exit 1
19
+ fi
20
+ echo "Source branch '$HEAD_REF' is allowed."
@@ -0,0 +1,107 @@
1
+ name: Publish flash dev-channel package
2
+
3
+ # Auto-publish the dev-channel package `freesolo-flash-dev` (the `flash-dev` CLI, which defaults
4
+ # to the staging plane flash-dev.freesolo.co) to PyPI from the `dev` branch.
5
+ #
6
+ # Trigger: a push to `dev` that touches package files. We publish iff the dev-channel version
7
+ # ([tool.flash-dev].version in pyproject.toml) is NOT already on PyPI. So bumping that version and
8
+ # merging to `dev` cuts a release; ordinary dev pushes (version unchanged -> already published)
9
+ # no-op. This mirrors freesolo-flash's publish.yml, but keyed on the dev version and on `dev`
10
+ # instead of `main`, and a no-op is a clean success (not a failure) since most dev pushes don't
11
+ # bump it. Manual runs via workflow_dispatch force a publish attempt of the current dev version.
12
+ on:
13
+ push:
14
+ branches:
15
+ - dev
16
+ paths:
17
+ - "pyproject.toml"
18
+ - "uv.lock"
19
+ - "flash/**"
20
+ - "scripts/build_dev_dist.py"
21
+ - ".github/workflows/publish-dev.yml"
22
+ workflow_dispatch:
23
+
24
+ concurrency:
25
+ group: publish-flash-dev-${{ github.ref }}
26
+ cancel-in-progress: false
27
+
28
+ jobs:
29
+ publish-pypi-dev:
30
+ name: Publish freesolo-flash-dev to PyPI
31
+ runs-on: ubuntu-latest
32
+ permissions:
33
+ contents: read
34
+ env:
35
+ UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
36
+
37
+ steps:
38
+ - uses: actions/checkout@v6
39
+ with:
40
+ fetch-depth: 0
41
+
42
+ # tomllib is stdlib only since 3.11; pin the interpreter before the metadata step below.
43
+ - name: Set up Python
44
+ uses: actions/setup-python@v5
45
+ with:
46
+ python-version: "3.11"
47
+
48
+ - name: Read dev-channel version
49
+ id: meta
50
+ run: |
51
+ python3 - <<'PY' >> "$GITHUB_OUTPUT"
52
+ import tomllib
53
+
54
+ with open("pyproject.toml", "rb") as f:
55
+ data = tomllib.load(f)
56
+ print(f"version={data['tool']['flash-dev']['version']}")
57
+ PY
58
+
59
+ - name: Decide whether to publish
60
+ id: decide
61
+ env:
62
+ VERSION: ${{ steps.meta.outputs.version }}
63
+ run: |
64
+ python3 - <<'PY' >> "$GITHUB_OUTPUT"
65
+ import os
66
+ import urllib.error
67
+ import urllib.request
68
+
69
+ version = os.environ["VERSION"]
70
+ url = f"https://pypi.org/pypi/freesolo-flash-dev/{version}/json"
71
+ published = False
72
+ try:
73
+ with urllib.request.urlopen(url, timeout=30) as response:
74
+ published = response.status == 200
75
+ except urllib.error.HTTPError as error:
76
+ if error.code != 404:
77
+ raise
78
+ # Publish only when the dev version isn't on PyPI yet (i.e. it was just bumped).
79
+ print(f"publish={'false' if published else 'true'}")
80
+ PY
81
+
82
+ - name: Already published (nothing to do)
83
+ if: steps.decide.outputs.publish == 'false'
84
+ run: echo "freesolo-flash-dev ${{ steps.meta.outputs.version }} is already on PyPI; nothing to publish."
85
+
86
+ - name: Install uv
87
+ if: steps.decide.outputs.publish == 'true'
88
+ uses: astral-sh/setup-uv@v6
89
+ with:
90
+ python-version: "3.11"
91
+
92
+ - name: Build dev-channel distribution
93
+ if: steps.decide.outputs.publish == 'true'
94
+ run: |
95
+ rm -rf dist
96
+ # build_dev_dist.py renames the package to freesolo-flash-dev, retargets the CLI to
97
+ # flash-dev, flips the channel to the staging plane, then runs `uv build`.
98
+ python3 scripts/build_dev_dist.py
99
+
100
+ - name: Publish to PyPI
101
+ if: steps.decide.outputs.publish == 'true'
102
+ run: |
103
+ if [ -z "$UV_PUBLISH_TOKEN" ]; then
104
+ echo "::error::UV_PUBLISH_TOKEN is empty (sourced from the PYPI_API_TOKEN repo secret); refusing to publish without credentials. Set the PYPI_API_TOKEN secret on this repository."
105
+ exit 1
106
+ fi
107
+ uv publish
@@ -0,0 +1,69 @@
1
+ name: Publish flash image
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ - dev
8
+ paths:
9
+ - "flash/**"
10
+ - "Dockerfile"
11
+ - "pyproject.toml"
12
+ - ".github/workflows/publish-image.yml"
13
+ workflow_dispatch:
14
+
15
+ concurrency:
16
+ group: publish-flash-image-${{ github.ref }}
17
+ cancel-in-progress: true
18
+
19
+ jobs:
20
+ publish-flash-image:
21
+ name: Build and push flash control-plane image
22
+ runs-on: ubuntu-latest
23
+ permissions:
24
+ contents: read
25
+ packages: write
26
+ env:
27
+ IMAGE: ghcr.io/freesolo-co/freesolo-flash
28
+
29
+ steps:
30
+ - uses: actions/checkout@v6
31
+
32
+ - name: Set up Docker Buildx
33
+ uses: docker/setup-buildx-action@v3
34
+
35
+ - name: Log in to GHCR
36
+ uses: docker/login-action@v3
37
+ with:
38
+ registry: ghcr.io
39
+ username: ${{ github.actor }}
40
+ password: ${{ secrets.GITHUB_TOKEN }}
41
+
42
+ - name: Compute tags
43
+ id: tags
44
+ run: |
45
+ # main publishes :main + :latest; any other branch (dev) publishes
46
+ # :<branch>. every build also gets an immutable :sha-<short> tag.
47
+ short_sha="$(git rev-parse --short HEAD)"
48
+ branch="${GITHUB_REF_NAME}"
49
+ {
50
+ echo "tags<<EOF"
51
+ if [ "$branch" = "main" ]; then
52
+ echo "${IMAGE}:main"
53
+ echo "${IMAGE}:latest"
54
+ else
55
+ echo "${IMAGE}:${branch}"
56
+ fi
57
+ echo "${IMAGE}:sha-${short_sha}"
58
+ echo "EOF"
59
+ } >> "$GITHUB_OUTPUT"
60
+
61
+ - name: Build and push
62
+ uses: docker/build-push-action@v6
63
+ with:
64
+ context: .
65
+ file: ./Dockerfile
66
+ push: true
67
+ tags: ${{ steps.tags.outputs.tags }}
68
+ cache-from: type=gha
69
+ cache-to: type=gha,mode=min
@@ -0,0 +1,174 @@
1
+ name: Publish flash package
2
+
3
+ # Auto-publish freesolo-flash to PyPI when a push to main bumps the version in pyproject.toml.
4
+ # Mirrors freesolo-sdk's publish flow: a push that touches package files MUST also raise the
5
+ # version (else CI fails), and a version already on PyPI is rejected — so merging dev -> main
6
+ # with a bumped version is all it takes to release. Manual runs are still possible via
7
+ # workflow_dispatch.
8
+ on:
9
+ push:
10
+ branches:
11
+ - main
12
+ paths:
13
+ - "pyproject.toml"
14
+ - "uv.lock"
15
+ - "flash/**"
16
+ - ".github/workflows/publish.yml"
17
+ workflow_dispatch:
18
+
19
+ concurrency:
20
+ group: publish-flash-${{ github.ref }}
21
+ cancel-in-progress: false
22
+
23
+ jobs:
24
+ publish-pypi:
25
+ name: Publish flash PyPI package
26
+ runs-on: ubuntu-latest
27
+ permissions:
28
+ contents: read
29
+ env:
30
+ UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
31
+
32
+ steps:
33
+ - uses: actions/checkout@v6
34
+ with:
35
+ fetch-depth: 0
36
+
37
+ # Pin Python >= 3.11 BEFORE any step that imports tomllib (stdlib only since 3.11).
38
+ # The metadata + version-bump steps below run `python3 -c "import tomllib"`, which would
39
+ # fail on a runner whose default python3 predates 3.11 — so set it up here, not later
40
+ # (the uv step that follows brings its own interpreter, but only after those tomllib steps).
41
+ - name: Set up Python
42
+ uses: actions/setup-python@v5
43
+ with:
44
+ python-version: "3.11"
45
+
46
+ - name: Read package metadata
47
+ id: metadata
48
+ run: |
49
+ python3 - <<'PY' >> "$GITHUB_OUTPUT"
50
+ import tomllib
51
+
52
+ with open("pyproject.toml", "rb") as f:
53
+ project = tomllib.load(f)["project"]
54
+
55
+ print(f"name={project['name']}")
56
+ print(f"version={project['version']}")
57
+ PY
58
+
59
+ - name: Require version bump for package changes
60
+ id: changes
61
+ if: github.event_name == 'push'
62
+ env:
63
+ BEFORE_SHA: ${{ github.event.before }}
64
+ CURRENT_SHA: ${{ github.sha }}
65
+ run: |
66
+ python3 - <<'PY'
67
+ import os
68
+ import re
69
+ import subprocess
70
+ import sys
71
+ import tomllib
72
+ from pathlib import Path
73
+
74
+ output_path = Path(os.environ["GITHUB_OUTPUT"])
75
+
76
+ def set_package_changed(value: bool) -> None:
77
+ with output_path.open("a", encoding="utf-8") as output:
78
+ output.write(f"package_changed={'true' if value else 'false'}\n")
79
+
80
+ before = os.environ["BEFORE_SHA"]
81
+ current = os.environ["CURRENT_SHA"]
82
+ # First push to the branch (no parent): publish whatever version is present.
83
+ if not before or set(before) == {"0"}:
84
+ set_package_changed(True)
85
+ raise SystemExit(0)
86
+
87
+ changed = subprocess.check_output(
88
+ ["git", "diff", "--name-only", before, current],
89
+ text=True,
90
+ ).splitlines()
91
+ # Package files = the importable source, the project metadata, and the lockfile.
92
+ package_changed = any(
93
+ path == "pyproject.toml"
94
+ or path == "uv.lock"
95
+ or path.startswith("flash/")
96
+ for path in changed
97
+ )
98
+ if not package_changed:
99
+ set_package_changed(False)
100
+ raise SystemExit(0)
101
+ set_package_changed(True)
102
+
103
+ def read_version(data: str) -> str:
104
+ return tomllib.loads(data)["project"]["version"]
105
+
106
+ def version_key(value: str) -> tuple[int, ...]:
107
+ return tuple(int(part) for part in re.findall(r"\d+", value))
108
+
109
+ previous_version = read_version(
110
+ subprocess.check_output(
111
+ ["git", "show", f"{before}:pyproject.toml"],
112
+ text=True,
113
+ )
114
+ )
115
+ current_version = read_version(Path("pyproject.toml").read_text())
116
+ if version_key(current_version) <= version_key(previous_version):
117
+ print(
118
+ "::error::Package files changed but pyproject.toml version "
119
+ f"did not increase: {previous_version} -> {current_version}"
120
+ )
121
+ sys.exit(1)
122
+ PY
123
+
124
+ - name: Fail if PyPI version already exists
125
+ if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
126
+ env:
127
+ PACKAGE_NAME: ${{ steps.metadata.outputs.name }}
128
+ PACKAGE_VERSION: ${{ steps.metadata.outputs.version }}
129
+ run: |
130
+ python3 - <<'PY'
131
+ import os
132
+ import urllib.error
133
+ import urllib.request
134
+
135
+ name = os.environ["PACKAGE_NAME"]
136
+ version = os.environ["PACKAGE_VERSION"]
137
+ url = f"https://pypi.org/pypi/{name}/{version}/json"
138
+
139
+ try:
140
+ with urllib.request.urlopen(url, timeout=30) as response:
141
+ if response.status == 200:
142
+ raise SystemExit(
143
+ f"{name} {version} is already on PyPI. "
144
+ "Bump pyproject.toml before publishing."
145
+ )
146
+ except urllib.error.HTTPError as error:
147
+ if error.code != 404:
148
+ raise
149
+ PY
150
+
151
+ - name: Install uv
152
+ if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
153
+ uses: astral-sh/setup-uv@v6
154
+ with:
155
+ python-version: "3.11"
156
+
157
+ - name: Build distributions
158
+ if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
159
+ run: |
160
+ rm -rf dist
161
+ uv build
162
+
163
+ - name: Publish to PyPI
164
+ if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.package_changed == 'true'
165
+ run: |
166
+ if [ -z "$UV_PUBLISH_TOKEN" ]; then
167
+ echo "::error::UV_PUBLISH_TOKEN is empty (sourced from the PYPI_API_TOKEN repo secret); refusing to publish without credentials. Set the PYPI_API_TOKEN secret on this repository."
168
+ exit 1
169
+ fi
170
+ uv publish
171
+
172
+ - name: No package changes
173
+ if: github.event_name == 'push' && steps.changes.outputs.package_changed == 'false'
174
+ run: echo "No package files changed; nothing to publish."
@@ -0,0 +1,85 @@
1
+ name: flash worker image
2
+
3
+ # Builds the prebuilt flash worker image (full training stack + flash-attn baked in) and
4
+ # pushes it to GHCR, so Vast/RunPod cold-start skips the per-host dep install (the dominant
5
+ # cold-start cost). Training and serving both default to the pinned WORKER_IMAGE constant
6
+ # (flash/providers/runpod/train.py) — bump that to the new tag. FLASH_WORKER_IMAGE is an
7
+ # operator env override honored by BOTH training (providers/runpod/{jobs,train}.py) and serving
8
+ # (flash/serve/deploy.py).
9
+ #
10
+ # Triggered automatically on flash/** changes (worker training code is baked in; skipping a
11
+ # rebuild leaves GPU workers running stale code) and manually for Dockerfile/dep changes.
12
+ # The build is heavy (vllm + full training stack + flash-attn wheel) but Docker layer caching
13
+ # means only the flash COPY+install layer reruns on pure Python changes (~minutes, not hours).
14
+
15
+ on:
16
+ workflow_dispatch:
17
+ inputs:
18
+ tag:
19
+ description: image tag
20
+ default: cu128
21
+ required: false
22
+ flash_attn_3_spec:
23
+ description: >-
24
+ FlashAttention-3 (Hopper sm90) install spec -> FLASH_ATTN_3_SPEC build-arg. Leave at the
25
+ default validated wheel to bake FA3 into every image; override to pin a different wheel.
26
+ default: "https://github.com/windreamer/flash-attention3-wheels/releases/download/2026.03.19-850211f/flash_attn_3-3.0.0%2B20260318.cu128torch2100cxx11abitrue.8afc61-cp39-abi3-linux_x86_64.whl"
27
+ required: false
28
+ push:
29
+ branches: [main]
30
+ paths:
31
+ - Dockerfile.worker
32
+ - .github/workflows/worker-image.yml
33
+ - flash/**
34
+ - pyproject.toml
35
+ # The baked rp_handler is generated from _train_body at build time (docker/make_rp_handler.py),
36
+ # so handler changes (e.g. the weight-cache preload branch) MUST rebuild the image. flash/**
37
+ # covers the handler source (endpoints.py); the generator lives outside flash/, so watch it too.
38
+ - docker/make_rp_handler.py
39
+
40
+ permissions:
41
+ contents: read
42
+ packages: write
43
+
44
+ jobs:
45
+ build:
46
+ runs-on: ubuntu-24.04-8core
47
+ steps:
48
+ - uses: actions/checkout@v6
49
+
50
+ - name: Free disk space
51
+ run: |
52
+ sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/.ghcup \
53
+ /usr/local/lib/android /opt/hostedtoolcache /usr/share/swift \
54
+ /usr/local/share/boost /usr/local/lib/node_modules \
55
+ /usr/local/share/powershell /usr/share/miniconda || true
56
+ sudo apt-get clean || true
57
+ sudo docker image prune -af || true
58
+ df -h /
59
+
60
+ - uses: docker/setup-buildx-action@v3
61
+
62
+ - name: Log in to GHCR
63
+ uses: docker/login-action@v3
64
+ with:
65
+ registry: ghcr.io
66
+ username: ${{ github.actor }}
67
+ password: ${{ secrets.GITHUB_TOKEN }}
68
+
69
+ - name: Build + push worker image
70
+ uses: docker/build-push-action@v6
71
+ with:
72
+ context: .
73
+ file: Dockerfile.worker
74
+ push: true
75
+ # Pin the freesolo-co namespace: both RunPod and Vast pull the hardcoded
76
+ # WORKER_IMAGE = ghcr.io/freesolo-co/flash-worker:cu128 (flash/providers/runpod/train.py),
77
+ # so a fork run must not push to its own owner namespace and leave the real image stale.
78
+ tags: ghcr.io/freesolo-co/flash-worker:${{ github.event.inputs.tag || 'cu128' }}
79
+ provenance: false
80
+ # Install a PREBUILT flash-attn wheel instead of compiling from source (a multi-arch
81
+ # source compile is huge and OOM-kills the runner). The wheel matches the base exactly
82
+ # (cu128 / torch2.10 / cp312).
83
+ build-args: |
84
+ FLASH_ATTN_SPEC=https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.0/flash_attn-2.8.3%2Bcu128torch2.10-cp312-cp312-linux_x86_64.whl
85
+ FLASH_ATTN_3_SPEC=${{ github.event.inputs.flash_attn_3_spec || 'https://github.com/windreamer/flash-attention3-wheels/releases/download/2026.03.19-850211f/flash_attn_3-3.0.0%2B20260318.cu128torch2100cxx11abitrue.8afc61-cp39-abi3-linux_x86_64.whl' }}