freesolo-flash-dev 0.2.25__tar.gz → 0.2.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.env.example +5 -0
  2. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/bake-kernel-cache.yml +20 -2
  3. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/publish-dev.yml +17 -3
  4. freesolo_flash_dev-0.2.26/.github/workflows/version-parity.yml +49 -0
  5. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/worker-image.yml +12 -0
  6. freesolo_flash_dev-0.2.26/Dockerfile +42 -0
  7. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/Dockerfile.worker +3 -3
  8. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/PKG-INFO +9 -6
  9. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/README.md +8 -5
  10. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/bake_kernel_cache.py +16 -2
  11. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/bake_pod_entry.py +5 -1
  12. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/README.md +2 -2
  13. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/generate.py +4 -4
  14. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/__init__.py +1 -1
  15. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_channel.py +7 -2
  16. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/catalog.py +73 -2
  17. {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/__init__.py +39 -5
  18. {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/__main__.py +1 -1
  19. {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/commands.py +34 -2
  20. {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/render.py +4 -8
  21. {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/training_doc.py +0 -3
  22. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/__init__.py +1 -1
  23. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/config.py +1 -1
  24. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/http.py +34 -5
  25. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/runtime_secrets.py +23 -6
  26. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/analytical.py +4 -1
  27. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/facts.py +24 -11
  28. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/spec.py +0 -1
  29. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/accounting.py +0 -2
  30. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/chalk_kernels.py +1 -6
  31. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/recipe.py +0 -12
  32. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/vram.py +42 -9
  33. freesolo_flash_dev-0.2.26/flash/engine/worker/__init__.py +589 -0
  34. freesolo_flash_dev-0.2.26/flash/engine/worker/_pkg.py +30 -0
  35. freesolo_flash_dev-0.2.26/flash/engine/worker/adapter.py +187 -0
  36. freesolo_flash_dev-0.2.26/flash/engine/worker/decoding.py +144 -0
  37. freesolo_flash_dev-0.2.26/flash/engine/worker/finalize.py +68 -0
  38. freesolo_flash_dev-0.2.26/flash/engine/worker/gpu_setup.py +101 -0
  39. freesolo_flash_dev-0.2.26/flash/engine/worker/grpo.py +297 -0
  40. freesolo_flash_dev-0.2.26/flash/engine/worker/heartbeat.py +322 -0
  41. freesolo_flash_dev-0.2.26/flash/engine/worker/hf.py +526 -0
  42. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/kernel_warmup.py +23 -18
  43. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/lora.py +42 -66
  44. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/packing.py +127 -7
  45. freesolo_flash_dev-0.2.26/flash/engine/worker/perf/__init__.py +495 -0
  46. freesolo_flash_dev-0.2.26/flash/engine/worker/perf/attn.py +176 -0
  47. freesolo_flash_dev-0.2.26/flash/engine/worker/perf/diagnostics.py +240 -0
  48. freesolo_flash_dev-0.2.26/flash/engine/worker/perf/lifecycle.py +274 -0
  49. freesolo_flash_dev-0.2.26/flash/engine/worker/perf/liger.py +73 -0
  50. freesolo_flash_dev-0.2.26/flash/engine/worker/perf/loraplus.py +40 -0
  51. freesolo_flash_dev-0.2.26/flash/engine/worker/perf/memory.py +84 -0
  52. freesolo_flash_dev-0.2.26/flash/engine/worker/rl.py +912 -0
  53. freesolo_flash_dev-0.2.26/flash/engine/worker/sft.py +683 -0
  54. freesolo_flash_dev-0.2.26/flash/engine/worker/wandb_log.py +145 -0
  55. freesolo_flash_dev-0.2.25/flash/envs/adapter/__init__.py → freesolo_flash_dev-0.2.26/flash/envs/adapter.py +7 -1
  56. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/__init__.py +5 -10
  57. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_auth.py +0 -8
  58. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_http.py +3 -3
  59. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_instance.py +108 -51
  60. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_instance_bootstrap.py +35 -37
  61. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/_poll.py +147 -14
  62. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/allocator.py +9 -37
  63. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/base.py +61 -50
  64. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/__init__.py +18 -5
  65. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/jobs/__init__.py +137 -30
  66. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/jobs/builders.py +4 -3
  67. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/pricing.py +1 -0
  68. freesolo_flash_dev-0.2.26/flash/providers/preflight.py +92 -0
  69. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/realized.py +5 -5
  70. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/__init__.py +8 -2
  71. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/api.py +100 -8
  72. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/gpus.py +0 -5
  73. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/jobs.py +180 -72
  74. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/keys.py +15 -9
  75. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/preload.py +42 -140
  76. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/train/__init__.py +1 -50
  77. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/train/deps.py +119 -53
  78. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/train/endpoints.py +39 -29
  79. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/__init__.py +148 -23
  80. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/lifecycle.py +50 -40
  81. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/schema/__init__.py +3 -3
  82. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/serve/deploy.py +6 -0
  83. freesolo_flash_dev-0.2.26/flash/serve/export.py +176 -0
  84. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/serve/pricing.py +3 -0
  85. freesolo_flash_dev-0.2.26/flash/server/_deps.py +115 -0
  86. freesolo_flash_dev-0.2.26/flash/server/_locks.py +54 -0
  87. freesolo_flash_dev-0.2.26/flash/server/_runtime.py +238 -0
  88. freesolo_flash_dev-0.2.26/flash/server/app.py +371 -0
  89. freesolo_flash_dev-0.2.26/flash/server/billing_retry.py +122 -0
  90. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/envs.py +3 -13
  91. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/reconcile.py +1 -1
  92. freesolo_flash_dev-0.2.26/flash/server/routes/__init__.py +6 -0
  93. freesolo_flash_dev-0.2.26/flash/server/routes/envs.py +48 -0
  94. freesolo_flash_dev-0.2.26/flash/server/routes/meta.py +42 -0
  95. freesolo_flash_dev-0.2.26/flash/server/routes/runs.py +175 -0
  96. freesolo_flash_dev-0.2.26/flash/server/routes/serving.py +427 -0
  97. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/spec.py +4 -3
  98. freesolo_flash_dev-0.2.26/infisical-entrypoint.sh +45 -0
  99. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/pyproject.toml +9 -8
  100. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/conftest.py +5 -6
  101. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/conftest.py +2 -2
  102. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_agent_flash_cli_contract.py +2 -2
  103. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_allocator.py +37 -18
  104. freesolo_flash_dev-0.2.26/tests/test_billing_retry.py +803 -0
  105. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_checkpoints.py +197 -0
  106. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_commands.py +59 -19
  107. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_errors.py +1 -1
  108. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_estimate.py +1 -1
  109. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_managed.py +1 -1
  110. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cli_render_theme.py +2 -2
  111. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_client.py +44 -0
  112. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_client_server_integration.py +25 -1
  113. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_config_overrides.py +1 -1
  114. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_analytical.py +23 -5
  115. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_equation.py +1 -1
  116. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_hardware.py +17 -12
  117. freesolo_flash_dev-0.2.26/tests/test_cost_models.py +64 -0
  118. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_dev_channel.py +11 -0
  119. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_disk_gb.py +9 -2
  120. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_install.py +2 -2
  121. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_push.py +2 -2
  122. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_envs_coverage.py +2 -2
  123. freesolo_flash_dev-0.2.26/tests/test_export.py +466 -0
  124. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_flash_mvp.py +1 -27
  125. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_flash_worker.py +152 -17
  126. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_gpus.py +9 -8
  127. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_grpo_params.py +249 -0
  128. freesolo_flash_dev-0.2.26/tests/test_idle_endpoint_reaper.py +478 -0
  129. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_jobs.py +284 -26
  130. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_lambda_runner.py +344 -19
  131. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_login_perms.py +2 -2
  132. freesolo_flash_dev-0.2.26/tests/test_mig_guard.py +284 -0
  133. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_packing.py +352 -0
  134. freesolo_flash_dev-0.2.26/tests/test_poll_helpers.py +95 -0
  135. freesolo_flash_dev-0.2.26/tests/test_preflight.py +173 -0
  136. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_provider_routing.py +99 -32
  137. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_provider_teardown_robustness.py +16 -63
  138. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_providers_symmetry.py +38 -8
  139. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_realized_cost.py +1 -1
  140. freesolo_flash_dev-0.2.26/tests/test_resume_on_retry.py +389 -0
  141. freesolo_flash_dev-0.2.26/tests/test_runpod_key_fingerprint.py +75 -0
  142. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runpod_key_waterfall.py +29 -1
  143. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_serve.py +49 -0
  144. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_server_api.py +289 -1
  145. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_server_billing.py +23 -1
  146. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_server_db.py +20 -1
  147. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_thinking_config.py +1 -1
  148. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_version.py +1 -1
  149. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_vl_weight_sync.py +27 -3
  150. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_weight_cache.py +264 -198
  151. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_dryrun.py +35 -24
  152. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_image.py +20 -1
  153. freesolo_flash_dev-0.2.26/tests/test_worker_init_heartbeat.py +357 -0
  154. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_stack.py +202 -10
  155. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/uv.lock +1 -1
  156. freesolo_flash_dev-0.2.25/Dockerfile +0 -24
  157. freesolo_flash_dev-0.2.25/flash/cli/__init__.py +0 -1
  158. freesolo_flash_dev-0.2.25/flash/engine/worker/__init__.py +0 -2916
  159. freesolo_flash_dev-0.2.25/flash/engine/worker/perf.py +0 -1048
  160. freesolo_flash_dev-0.2.25/flash/envs/adapter/rubric.py +0 -222
  161. freesolo_flash_dev-0.2.25/flash/mcp/__init__.py +0 -1
  162. freesolo_flash_dev-0.2.25/flash/mcp/server.py +0 -85
  163. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/__init__.py +0 -127
  164. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/api.py +0 -522
  165. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/auth.py +0 -17
  166. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/gpus.py +0 -29
  167. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/__init__.py +0 -632
  168. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/jobs/builders.py +0 -122
  169. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/preflight.py +0 -23
  170. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/pricing.py +0 -26
  171. freesolo_flash_dev-0.2.25/flash/providers/hyperstack/train.py +0 -25
  172. freesolo_flash_dev-0.2.25/flash/providers/preflight.py +0 -55
  173. freesolo_flash_dev-0.2.25/flash/server/app.py +0 -961
  174. freesolo_flash_dev-0.2.25/tests/live/test_hyperstack_live.py +0 -50
  175. freesolo_flash_dev-0.2.25/tests/test_cost_models.py +0 -36
  176. freesolo_flash_dev-0.2.25/tests/test_hyperstack_runner.py +0 -1031
  177. freesolo_flash_dev-0.2.25/tests/test_idle_endpoint_reaper.py +0 -285
  178. freesolo_flash_dev-0.2.25/tests/test_mig_guard.py +0 -70
  179. freesolo_flash_dev-0.2.25/tests/test_poll_helpers.py +0 -52
  180. freesolo_flash_dev-0.2.25/tests/test_preflight.py +0 -98
  181. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.dockerignore +0 -0
  182. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/ci.yml +0 -0
  183. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/main-source-guard.yml +0 -0
  184. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/publish-image.yml +0 -0
  185. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.github/workflows/publish.yml +0 -0
  186. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/.gitignore +0 -0
  187. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/LICENSE +0 -0
  188. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/build/kernel_cache/.gitignore +0 -0
  189. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/build/kernel_cache/.keep +0 -0
  190. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/Dockerfile.kernelcache +0 -0
  191. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docker/make_rp_handler.py +0 -0
  192. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/index.html +0 -0
  193. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/cli-style/preview.png +0 -0
  194. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/docs/kernel-cache.md +0 -0
  195. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_fileio.py +0 -0
  196. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_logging.py +0 -0
  197. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/_update_check.py +0 -0
  198. {freesolo_flash_dev-0.2.25/flash/cli/main → freesolo_flash_dev-0.2.26/flash/cli}/envpush.py +0 -0
  199. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/client/specs.py +0 -0
  200. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/__init__.py +0 -0
  201. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/cost/types.py +0 -0
  202. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/__init__.py +0 -0
  203. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/multiturn_rollout.py +0 -0
  204. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/engine/worker/__main__.py +0 -0
  205. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/envs/__init__.py +0 -0
  206. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/envs/base.py +0 -0
  207. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/envs/registry.py +0 -0
  208. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/api.py +0 -0
  209. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/auth.py +0 -0
  210. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/gpus.py +0 -0
  211. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/preflight.py +0 -0
  212. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/lambdalabs/train.py +0 -0
  213. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/auth.py +0 -0
  214. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/cost.py +0 -0
  215. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/preflight.py +0 -0
  216. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/pricing.py +0 -0
  217. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/providers/runpod/slots.py +0 -0
  218. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/py.typed +0 -0
  219. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/checkpoints.py +0 -0
  220. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/runner/deploy.py +0 -0
  221. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/schema/fields.py +0 -0
  222. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/serve/__init__.py +0 -0
  223. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/__init__.py +0 -0
  224. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/__main__.py +0 -0
  225. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/auth.py +0 -0
  226. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/billing.py +0 -0
  227. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/checkpoints.py +0 -0
  228. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/db.py +0 -0
  229. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/environment_registry.py +0 -0
  230. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/flash/server/run_registry.py +0 -0
  231. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/scripts/build_dev_dist.py +0 -0
  232. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/__init__.py +0 -0
  233. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/_helpers/__init__.py +0 -0
  234. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/_helpers/runner.py +0 -0
  235. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/_helpers/specs.py +0 -0
  236. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/fixtures/math_eval.jsonl +0 -0
  237. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/fixtures/math_train.jsonl +0 -0
  238. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/__init__.py +0 -0
  239. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/test_lambda_live.py +0 -0
  240. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/live/test_runpod_live.py +0 -0
  241. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_algorithms.py +0 -0
  242. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_backend_jobspec_contract.py +0 -0
  243. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cancel_remote.py +0 -0
  244. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_catalog_consistency.py +0 -0
  245. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_chalk_kernels.py +0 -0
  246. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_estimate.py +0 -0
  247. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_cost_rewards.py +0 -0
  248. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_endpoint_name.py +0 -0
  249. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_publish.py +0 -0
  250. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_env_rate_limit_resolve.py +0 -0
  251. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_github_urlopen_retry.py +0 -0
  252. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_grpo_mask_aware.py +0 -0
  253. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_grpo_sleep_gate.py +0 -0
  254. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_kernel_cache.py +0 -0
  255. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_kv_util.py +0 -0
  256. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_logging.py +0 -0
  257. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_managed_hf_repo.py +0 -0
  258. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_metrics_schema_agent_contract.py +0 -0
  259. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_multiturn_rollout.py +0 -0
  260. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_open_model_policy.py +0 -0
  261. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_orchestrator_flash.py +0 -0
  262. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_resolve_params_b.py +0 -0
  263. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runmgmt.py +0 -0
  264. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runpod_api_delete.py +0 -0
  265. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_runpod_slots.py +0 -0
  266. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_serving_contract.py +0 -0
  267. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_spec_and_validation.py +0 -0
  268. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_update_check.py +0 -0
  269. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_verifiers.py +0 -0
  270. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_vl_warmstart_adapter_keys.py +0 -0
  271. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_wandb_naming.py +0 -0
  272. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_warmstart_cross_repo.py +0 -0
  273. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_hardexit.py +0 -0
  274. {freesolo_flash_dev-0.2.25 → freesolo_flash_dev-0.2.26}/tests/test_worker_thinking.py +0 -0
@@ -5,6 +5,11 @@
5
5
  # GPU substrate. RunPod is the default; Vast is opt-in (only required when set).
6
6
  RUNPOD_API_KEY=
7
7
  VAST_API_KEY=
8
+ # Use the per-arch baked worker images (cu128-<sm>) to skip the ~10-15 min cold-start JIT. Requires
9
+ # the per-SM images published first (.github/workflows/bake-kernel-cache.yml), and a manual re-bake
10
+ # after any worker-deps change. The control-plane Dockerfile sets this to 1 by default; uncomment to
11
+ # enable for a bare `flash-server` deploy.
12
+ # FLASH_WORKER_IMAGE_PER_SM=1
8
13
  # HuggingFace token with write access to each run's [train] hf_repo (code upload +
9
14
  # streamed checkpoints/adapters land in that per-run dataset repo). The artifact repo
10
15
  # is per-run (set in the run TOML's [train] hf_repo), not an operator-wide env var.
@@ -56,11 +56,24 @@ jobs:
56
56
  gpu_type_id: "NVIDIA H100 80GB HBM3",
57
57
  allowed_cuda: "",
58
58
  }
59
- # Blackwell needs CUDA-13 hosts to JIT its PTX (matches min_cuda_for in the provider).
59
+ # Blackwell needs CUDA-13 hosts to JIT its PTX (matches min_cuda_for in the provider). Bake
60
+ # sm120 on the RTX Pro 6000 (Server Edition): it's the same sm120 (cache is sm-keyed, so it's
61
+ # valid for RTX 5090 too) but has far better secure-cloud on-demand capacity -- the RTX 5090
62
+ # pool repeatedly returned "machine does not have the resources" at create_pod.
60
63
  - {
61
64
  sm: sm120,
62
65
  arch: "12.0",
63
- gpu_type_id: "NVIDIA GeForce RTX 5090",
66
+ gpu_type_id: "NVIDIA RTX PRO 6000 Blackwell Server Edition",
67
+ allowed_cuda: "13.0",
68
+ }
69
+ # Datacenter Blackwell (sm100, distinct from the sm120 RTX Pro 6000). B200 has 180 GB and
70
+ # good secure-cloud capacity. CUDA-13 host like the other Blackwell parts. NOT in the default
71
+ # `sms` list yet (B200 is unvalidated) -- bake it explicitly via `sms=sm100` until a smoke
72
+ # passes, then add it to the default.
73
+ - {
74
+ sm: sm100,
75
+ arch: "10.0",
76
+ gpu_type_id: "NVIDIA B200",
64
77
  allowed_cuda: "13.0",
65
78
  }
66
79
  steps:
@@ -80,6 +93,11 @@ jobs:
80
93
  - name: Install uv
81
94
  if: steps.gate.outputs.run == 'true'
82
95
  uses: astral-sh/setup-uv@v5
96
+ with:
97
+ # no cache: the post-run cache-prune calls `uv`, but the "Free disk space" step deletes
98
+ # /opt/hostedtoolcache (where setup-uv put uv), so the post step fails ("uv not found") and
99
+ # marks the whole job red even though the bake + push already succeeded.
100
+ enable-cache: false
83
101
 
84
102
  - name: Sync deps (flash + runpod + hf)
85
103
  if: steps.gate.outputs.run == 'true'
@@ -8,7 +8,8 @@ name: Publish flash dev-channel package
8
8
  # merging to `dev` cuts a release; ordinary dev pushes (version unchanged -> already published)
9
9
  # no-op. This mirrors freesolo-flash's publish.yml, but keyed on the dev version and on `dev`
10
10
  # instead of `main`, and a no-op is a clean success (not a failure) since most dev pushes don't
11
- # bump it. Manual runs via workflow_dispatch force a publish attempt of the current dev version.
11
+ # bump it. Manual runs via workflow_dispatch re-run the same version check for the current dev
12
+ # version (still a no-op when it's already on PyPI — not a forced re-publish).
12
13
  on:
13
14
  push:
14
15
  branches:
@@ -45,15 +46,28 @@ jobs:
45
46
  with:
46
47
  python-version: "3.11"
47
48
 
48
- - name: Read dev-channel version
49
+ - name: Read dev-channel version (and enforce parity with the prod version)
49
50
  id: meta
50
51
  run: |
51
52
  python3 - <<'PY' >> "$GITHUB_OUTPUT"
53
+ import sys
52
54
  import tomllib
53
55
 
54
56
  with open("pyproject.toml", "rb") as f:
55
57
  data = tomllib.load(f)
56
- print(f"version={data['tool']['flash-dev']['version']}")
58
+ dev_version = data["tool"]["flash-dev"]["version"]
59
+ prod_version = data["project"]["version"]
60
+ # The two channels MUST ship in lockstep (version-parity.yml enforces this on PRs). Re-check
61
+ # it here so the PUBLISH path can never ship a dev wheel out of sync with freesolo-flash even
62
+ # if the parity CI was bypassed or branch protection isn't strict — fail before build/publish.
63
+ if dev_version != prod_version:
64
+ print(
65
+ f"::error::version mismatch: [tool.flash-dev].version={dev_version} != "
66
+ f"[project].version={prod_version}; bump both in lockstep before publishing.",
67
+ file=sys.stderr,
68
+ )
69
+ sys.exit(1)
70
+ print(f"version={dev_version}")
57
71
  PY
58
72
 
59
73
  - name: Decide whether to publish
@@ -0,0 +1,49 @@
1
+ name: Version parity
2
+
3
+ # Keep the two release channels pinned to the same version: the prod package `freesolo-flash`
4
+ # (pyproject `[project].version`) and the dev-channel package `freesolo-flash-dev`
5
+ # (`[tool.flash-dev].version`). They publish from `main` and `dev` respectively, so a divergence
6
+ # would ship two channels claiming different versions. Bump both together.
7
+ on:
8
+ push:
9
+ branches: [main, dev]
10
+ pull_request:
11
+ branches: [main, dev]
12
+
13
+ permissions:
14
+ contents: read
15
+
16
+ jobs:
17
+ versions-match:
18
+ name: dev and main at the same version
19
+ runs-on: ubuntu-latest
20
+ steps:
21
+ - uses: actions/checkout@v6
22
+
23
+ # tomllib is stdlib only since 3.11; the runner's default python3 may predate that.
24
+ - name: Set up Python
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: "3.11"
28
+
29
+ - name: Compare channel versions
30
+ run: |
31
+ python3 - <<'PY'
32
+ import sys
33
+ import tomllib
34
+
35
+ with open("pyproject.toml", "rb") as f:
36
+ data = tomllib.load(f)
37
+
38
+ prod = data["project"]["version"]
39
+ dev = data["tool"]["flash-dev"]["version"]
40
+ if prod != dev:
41
+ print(
42
+ "::error::Channel version mismatch: "
43
+ f"[project].version={prod} (freesolo-flash) != "
44
+ f"[tool.flash-dev].version={dev} (freesolo-flash-dev). "
45
+ "Bump both to the same version."
46
+ )
47
+ sys.exit(1)
48
+ print(f"OK: freesolo-flash and freesolo-flash-dev are both at {prod}.")
49
+ PY
@@ -83,3 +83,15 @@ jobs:
83
83
  build-args: |
84
84
  FLASH_ATTN_SPEC=https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.0/flash_attn-2.8.3%2Bcu128torch2.10-cp312-cp312-linux_x86_64.whl
85
85
  FLASH_ATTN_3_SPEC=${{ github.event.inputs.flash_attn_3_spec || 'https://github.com/windreamer/flash-attention3-wheels/releases/download/2026.03.19-850211f/flash_attn_3-3.0.0%2B20260318.cu128torch2100cxx11abitrue.8afc61-cp39-abi3-linux_x86_64.whl' }}
86
+
87
+ # The per-arch baked images (cu128-<sm>) are rebaked MANUALLY (bake-kernel-cache.yml). When this
88
+ # rebuilds :cu128, those tags go stale -> prompt a rebake. Only when the cu128 tag was (re)built.
89
+ - name: Remind to rebake per-SM kernel-cache images
90
+ if: ${{ success() && (github.event.inputs.tag || 'cu128') == 'cu128' }}
91
+ run: |
92
+ echo "::warning title=Rebake per-SM images::A new :cu128 base was published. If FLASH_WORKER_IMAGE_PER_SM=1 is enabled, run bake-kernel-cache.yml to refresh cu128-sm{80,86,89,90,120}, else workers run stale baked deps."
93
+ {
94
+ echo "### ⚠️ Per-SM kernel-cache images may now be stale"
95
+ echo ""
96
+ echo "A fresh \`:cu128\` base was just published. If \`FLASH_WORKER_IMAGE_PER_SM=1\` is enabled, re-run **bake-kernel-cache.yml** to refresh the per-arch \`cu128-sm*\` tags, or workers will run the previous baked deps."
97
+ } >> "$GITHUB_STEP_SUMMARY"
@@ -0,0 +1,42 @@
1
+ # Flash control plane (operator-side).
2
+ #
3
+ # docker build -t flash-control-plane .
4
+ # docker run -p 8080:8080 \
5
+ # -e RUNPOD_API_KEY=... -e HF_TOKEN=... \
6
+ # -v flash-state:/root/.flash flash-control-plane
7
+ #
8
+ # All persistent state (key DB, run records, results) lives under ~/.flash (fixed paths,
9
+ # = /root/.flash for the default root user) — mount a volume there. Run exactly ONE
10
+ # container instance per state volume (state is local files + SQLite; no horizontal scaling).
11
+
12
+ FROM python:3.12-slim
13
+
14
+ WORKDIR /app
15
+ COPY . .
16
+ RUN apt-get update \
17
+ && apt-get install -y --no-install-recommends ca-certificates git curl \
18
+ && curl -1sLf 'https://artifacts-cli.infisical.com/setup.deb.sh' | bash \
19
+ && apt-get update && apt-get install -y --no-install-recommends infisical \
20
+ && rm -rf /var/lib/apt/lists/* \
21
+ && chmod +x /app/infisical-entrypoint.sh
22
+ RUN pip install --no-cache-dir ".[server]"
23
+
24
+ VOLUME /root/.flash
25
+ EXPOSE 8080
26
+
27
+ # Use the per-arch baked worker images (ghcr.io/.../flash-worker:cu128-<sm>) so cold workers skip the
28
+ # ~10-15 min first-use JIT; the allocator maps each GPU class to its matching -smXX tag. All validated
29
+ # SMs (sm80/86/89/90/120) are published. Rebakes are MANUAL -- after a Dockerfile.worker/deps change
30
+ # rebuilds :cu128, re-run bake-kernel-cache.yml so the -smXX tags don't ship stale deps (the
31
+ # worker-image build posts a reminder). Override at runtime with `-e FLASH_WORKER_IMAGE_PER_SM=0`.
32
+ #
33
+ # NOTE: this ENV is the default for BARE (non-Infisical) `flash-server` deploys. Under the Infisical
34
+ # entrypoint below, `infisical run` overrides the container env, so for the Infisical-managed deploy
35
+ # set FLASH_WORKER_IMAGE_PER_SM in the vault (path /flash) or add it to INFISICAL_KEEP -- otherwise
36
+ # this default may not reach the server.
37
+ ENV FLASH_WORKER_IMAGE_PER_SM=1
38
+
39
+ # secret injection wrapper: no-op passthrough unless INFISICAL_CLIENT_ID is set, else
40
+ # `infisical login` (universal-auth) then `infisical run --path /flash` before the server.
41
+ ENTRYPOINT ["/app/infisical-entrypoint.sh"]
42
+ CMD ["python", "-m", "flash.server", "--host", "0.0.0.0", "--port", "8080"]
@@ -73,9 +73,9 @@ RUN pip install --no-cache-dir \
73
73
  ARG FLASH_ATTN_SPEC=flash-attn
74
74
  # Source-build fallback only (ignored when FLASH_ATTN_SPEC is a wheel): bound the compile so it
75
75
  # doesn't OOM. TORCH_CUDA_ARCH_LIST restricts to the catalog's arches — Ampere (8.0 A100 / 8.6
76
- # 3090/A40), Ada (8.9 4090), Hopper (9.0 H100), Blackwell (12.0 RTX 5090; sm120); MAX_JOBS bounds
76
+ # 3090/A40), Ada (8.9 4090), Hopper (9.0 H100), Blackwell datacenter (10.0 B200; sm100) + workstation (12.0 RTX 5090/Pro 6000; sm120); MAX_JOBS bounds
77
77
  # peak compile memory. (A from-source build still needs a big-RAM host; the wheel avoids all of it.)
78
- RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 12.0" MAX_JOBS=4 \
78
+ RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 10.0 12.0" MAX_JOBS=4 \
79
79
  pip install --no-cache-dir "${FLASH_ATTN_SPEC}" --no-build-isolation \
80
80
  && echo "flash-attn: installed (${FLASH_ATTN_SPEC})" || echo "flash-attn: build failed, SDPA fallback"
81
81
 
@@ -119,7 +119,7 @@ RUN if [ -n "${FLASH_ATTN_3_SPEC}" ]; then \
119
119
  # import fine but raise "no kernel image is available for execution on the device" at the first conv
120
120
  # forward on sm120 — GPU-verified. engine.worker.packing.gdn_packing_available runs a conv smoke too,
121
121
  # so even a wrong-arch build can't crash a run (it just keeps GDN packing off).
122
- RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 12.0" CAUSAL_CONV1D_FORCE_BUILD=TRUE MAX_JOBS=4 \
122
+ RUN TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0 10.0 12.0" CAUSAL_CONV1D_FORCE_BUILD=TRUE MAX_JOBS=4 \
123
123
  pip install --no-cache-dir "causal-conv1d==1.6.2.post1" --no-build-isolation \
124
124
  && python -c "import causal_conv1d" \
125
125
  && echo "causal_conv1d: installed (GDN packing enabled)" \
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: freesolo-flash-dev
3
- Version: 0.2.25
3
+ Version: 0.2.26
4
4
  Summary: Flash — managed LoRA post-training (SFT/GRPO) for Freesolo environments, driven by the `flash` CLI
5
5
  Project-URL: Homepage, https://github.com/freesolo-co/flash
6
6
  Project-URL: Repository, https://github.com/freesolo-co/flash
@@ -85,7 +85,6 @@ The allocator picks the cheapest validated RunPod GPU class that fits the run.
85
85
  and ready-to-run configs to start from
86
86
  - `flash/serve/`, `flash/server/` — adapter serving and the FastAPI control
87
87
  plane (run operator-side via the separate `flash-server` command)
88
- - `flash/mcp/` — stdio MCP bridge for coding agents
89
88
  - `Dockerfile` — the control-plane image (used by the repo docker-compose)
90
89
  - `tests/` — pytest suite (CPU-only; offline-by-default, no GPU/network)
91
90
 
@@ -117,11 +116,15 @@ Two channels are published to PyPI from the *same source*, distinguished by one
117
116
  | prod | `freesolo-flash` | `flash` | `flash.freesolo.co` | push to `main` that bumps `[project].version` (`.github/workflows/publish.yml`) |
118
117
  | dev | `freesolo-flash-dev` | `flash-dev` | `flash-dev.freesolo.co` | push to `dev` whose `[tool.flash-dev].version` isn't on PyPI yet (`.github/workflows/publish-dev.yml`) |
119
118
 
120
- The two install side by side (distinct package + CLI names). The dev build is produced by
119
+ Each environment holds exactly **one** channel: both packages ship the same import package
120
+ (`flash/`) with one baked `CHANNEL` line, so installing both into the same environment makes the
121
+ later install win for *both* CLIs. For side-by-side prod and staging, install each channel in its
122
+ own virtualenv (or via `pipx`, which isolates per tool). The dev build is produced by
121
123
  `scripts/build_dev_dist.py`, which renames the package/CLI and flips `CHANNEL` to `dev` before
122
- `uv build`. To cut a dev release, bump `[tool.flash-dev].version` and merge to `dev`. Either CLI
123
- still honours an explicit `FLASH_API_URL` / `flash login --api-url`; the channel only sets the
124
- default.
124
+ `uv build`. Both channels ship at the **same version**: `[project].version` and
125
+ `[tool.flash-dev].version` must match (CI enforces this via `.github/workflows/version-parity.yml`),
126
+ so cutting a release means bumping both together. Either CLI still honours an explicit
127
+ `FLASH_API_URL` / the `login --api-url` flag; the channel only sets the default.
125
128
 
126
129
  ## Serving From an API
127
130
 
@@ -36,7 +36,6 @@ The allocator picks the cheapest validated RunPod GPU class that fits the run.
36
36
  and ready-to-run configs to start from
37
37
  - `flash/serve/`, `flash/server/` — adapter serving and the FastAPI control
38
38
  plane (run operator-side via the separate `flash-server` command)
39
- - `flash/mcp/` — stdio MCP bridge for coding agents
40
39
  - `Dockerfile` — the control-plane image (used by the repo docker-compose)
41
40
  - `tests/` — pytest suite (CPU-only; offline-by-default, no GPU/network)
42
41
 
@@ -68,11 +67,15 @@ Two channels are published to PyPI from the *same source*, distinguished by one
68
67
  | prod | `freesolo-flash` | `flash` | `flash.freesolo.co` | push to `main` that bumps `[project].version` (`.github/workflows/publish.yml`) |
69
68
  | dev | `freesolo-flash-dev` | `flash-dev` | `flash-dev.freesolo.co` | push to `dev` whose `[tool.flash-dev].version` isn't on PyPI yet (`.github/workflows/publish-dev.yml`) |
70
69
 
71
- The two install side by side (distinct package + CLI names). The dev build is produced by
70
+ Each environment holds exactly **one** channel: both packages ship the same import package
71
+ (`flash/`) with one baked `CHANNEL` line, so installing both into the same environment makes the
72
+ later install win for *both* CLIs. For side-by-side prod and staging, install each channel in its
73
+ own virtualenv (or via `pipx`, which isolates per tool). The dev build is produced by
72
74
  `scripts/build_dev_dist.py`, which renames the package/CLI and flips `CHANNEL` to `dev` before
73
- `uv build`. To cut a dev release, bump `[tool.flash-dev].version` and merge to `dev`. Either CLI
74
- still honours an explicit `FLASH_API_URL` / `flash login --api-url`; the channel only sets the
75
- default.
75
+ `uv build`. Both channels ship at the **same version**: `[project].version` and
76
+ `[tool.flash-dev].version` must match (CI enforces this via `.github/workflows/version-parity.yml`),
77
+ so cutting a release means bumping both together. Either CLI still honours an explicit
78
+ `FLASH_API_URL` / the `login --api-url` flag; the channel only sets the default.
76
79
 
77
80
  ## Serving From an API
78
81
 
@@ -69,7 +69,10 @@ def main() -> int:
69
69
  ap.add_argument("--gpu-type-id", required=True, help="RunPod gpuTypeId, e.g. 'NVIDIA H100 80GB HBM3'")
70
70
  ap.add_argument("--image", default="ghcr.io/freesolo-co/flash-worker:cu128")
71
71
  ap.add_argument("--out", default="build/kernel_cache")
72
- ap.add_argument("--container-disk-gb", type=int, default=80)
72
+ # the warm pod only pulls the ~20GB image + writes the cache (no model download), so keep this
73
+ # modest -- an over-large ask shrinks the eligible host pool and trips "machine does not have the
74
+ # resources" on scarce classes (e.g. Blackwell sm120 on secure cloud).
75
+ ap.add_argument("--container-disk-gb", type=int, default=60)
73
76
  ap.add_argument("--deadline-min", type=int, default=45)
74
77
  ap.add_argument("--run-id", default="", help="unique suffix for the temp repo (default: time+uuid)")
75
78
  ap.add_argument(
@@ -221,7 +224,18 @@ def _verify(out: str, sm: str) -> int:
221
224
  blob = os.path.join(out, "mega_cache.bin")
222
225
  meta = os.path.join(out, "mega_cache.json")
223
226
  if not os.path.isfile(blob):
224
- log(f"FAIL: no mega_cache.bin in {out}")
227
+ log(f"FAIL: no mega_cache.bin in {out}; what the warmup actually produced:")
228
+ for root, _, files in os.walk(out):
229
+ for f in sorted(files):
230
+ p = os.path.join(root, f)
231
+ log(f" present: {os.path.relpath(p, out)} ({os.path.getsize(p)} b)")
232
+ wl = os.path.join(out, "warmup.log")
233
+ if os.path.isfile(wl):
234
+ log(" --- warmup.log tail ---")
235
+ with open(wl, errors="replace") as wlf:
236
+ tail = wlf.read().splitlines()[-40:]
237
+ for line in tail:
238
+ log(f" | {line}")
225
239
  return 1
226
240
  try:
227
241
  with open(meta) as f:
@@ -49,7 +49,11 @@ def main() -> int:
49
49
  if arch:
50
50
  cmd += ["--arch", arch]
51
51
  print(f"[bake] running: {' '.join(cmd)}", flush=True)
52
- rc = subprocess.call(cmd, env=env)
52
+ # capture the warmup output into /out so it ships back with the cache -- lets the CI helper show
53
+ # WHICH warm steps compiled and what save_cache_artifacts returned when no mega_cache.bin lands.
54
+ os.makedirs("/out", exist_ok=True)
55
+ with open("/out/warmup.log", "wb") as lf:
56
+ rc = subprocess.call(cmd, env=env, stdout=lf, stderr=subprocess.STDOUT)
53
57
  print(f"[bake] kernel_warmup rc={rc}", flush=True)
54
58
 
55
59
  # ship the whole cache tree back (mega blob + metadata + raw triton/inductor dirs).
@@ -16,8 +16,8 @@ A standardized, production-grade output theme for every `flash` command.
16
16
  disable the themed layout with `FLASH_STYLE=0`. `NO_COLOR` keeps the layout but drops ANSI color.
17
17
  - **No new dependencies:** pure standard library, like the rest of the client CLI.
18
18
 
19
- The rendering lives in `flash/cli/main/render.py`; the command wiring is in
20
- `flash/cli/main/commands.py` and `envpush.py`.
19
+ The rendering lives in `flash/cli/render.py`; the command wiring is in
20
+ `flash/cli/commands.py` and `envpush.py`.
21
21
 
22
22
  ## Preview
23
23
 
@@ -22,8 +22,8 @@ import tempfile
22
22
  from contextlib import redirect_stderr, redirect_stdout
23
23
  from pathlib import Path
24
24
 
25
- from flash.cli import main as cli
26
- from flash.cli.main import render
25
+ import flash.cli as cli
26
+ from flash.cli import render
27
27
 
28
28
 
29
29
  class _Utf8(io.StringIO):
@@ -185,7 +185,7 @@ def _capture_argv(argv, *, styled, theme="dark", cwd=None, with_stderr=False) ->
185
185
  ``with_stderr`` is set, the stderr note (e.g. `flash train`'s hand-off line) is shown first,
186
186
  as it appears in a real terminal before the streamed logs."""
187
187
  _set_style(styled, theme)
188
- commands = sys.modules["flash.cli.main.commands"]
188
+ commands = sys.modules["flash.cli.commands"]
189
189
  saved = commands.client_from_config
190
190
  commands.client_from_config = lambda *a, **k: FAKE
191
191
  out, err = _Utf8(), _Utf8()
@@ -476,7 +476,7 @@ def main():
476
476
  out_path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(__file__).parent / "index.html"
477
477
  # Deterministic preview: pin the dry-run id (cmd_train calls new_run_id() every time) so the
478
478
  # committed gallery doesn't churn on every regeneration.
479
- sys.modules["flash.cli.main.commands"].new_run_id = lambda: "flash-1718900000-d0cf00ed"
479
+ sys.modules["flash.cli.commands"].new_run_id = lambda: "flash-1718900000-d0cf00ed"
480
480
  with tempfile.TemporaryDirectory() as td:
481
481
  # Point the installed-env registry at an empty temp manifest so `flash env list` never
482
482
  # leaks a developer's real installed env slugs (~/.flash/envs.json) into the preview.
@@ -1,7 +1,7 @@
1
1
  """Flash — managed LoRA post-training: log in with your freesolo key, train.
2
2
 
3
3
  A focused developer experience (TOML run specs, pluggable environments,
4
- CLI/API/MCP entry points, adapter deployment). Users authenticate with their
4
+ CLI/API entry points, adapter deployment). Users authenticate with their
5
5
  freesolo API key (`flash login`); the control plane runs each job on a managed
6
6
  RunPod GPU behind the scenes.
7
7
  """
@@ -5,8 +5,13 @@ below): it installs a ``flash`` CLI that talks to the production control plane.
5
5
  package ``freesolo-flash-dev`` is built from this *same source* with only this one line rewritten
6
6
  to ``CHANNEL = "dev"`` (see ``scripts/build_dev_dist.py``); everything that differs between the two
7
7
  channels — the CLI name, the PyPI distribution name, the default control-plane URL — derives from
8
- it below, so there is exactly one thing to flip. An explicit ``FLASH_API_URL`` /
9
- ``flash login --api-url`` always wins; the channel only picks the *default* plane.
8
+ it below, so there is exactly one thing to flip. An explicit ``FLASH_API_URL`` / the
9
+ ``login --api-url`` flag always wins; the channel only picks the *default* plane.
10
+
11
+ Both channels ship the SAME import package (``flash/``) with this one baked line, so a single
12
+ environment holds exactly ONE channel — installing ``freesolo-flash`` and ``freesolo-flash-dev``
13
+ into the same environment makes the later install win for both CLIs. For side-by-side prod and
14
+ staging, install each channel in its own virtualenv (or via ``pipx``, which isolates per tool).
10
15
  """
11
16
 
12
17
  from __future__ import annotations
@@ -46,6 +46,13 @@ class ModelInfo:
46
46
  # tier needs a bigger card than SFT (the colocate 2nd weight copy + KV pool). Consumed by
47
47
  # engine.vram.model_required_vram_gb.
48
48
  grpo_min_vram_gb: int = 0
49
+ # SFT hard VRAM floor (GB). 0 => SFT sizes purely from the param-based estimate and is free to
50
+ # down-route to a smaller validated card (the default — e.g. a 4B SFT estimates ~17 GB and rents
51
+ # a 48 GB card, NOT its ``min_vram_gb`` reference). Set it ONLY when a curated model must not be
52
+ # placed on the cheapest card the estimate would otherwise allow — e.g. a very large checkpoint
53
+ # whose ~param-est margin over the frozen-weights floor is too thin on the next card down.
54
+ # Consumed by engine.vram.model_required_vram_gb (the SFT analog of ``grpo_min_vram_gb``).
55
+ sft_min_vram_gb: int = 0
49
56
  notes: str = ""
50
57
  # Worker container disk this model needs (GB). 0 = the platform default (64 GB)
51
58
  # suffices. The runner raises gpu.disk_gb to at least this, so big-checkpoint
@@ -64,8 +71,14 @@ class ModelInfo:
64
71
  # completion cap. Curated per model below; defaults to the open-model fallback.
65
72
  vocab_size: int = _DEFAULT_VOCAB_SIZE
66
73
  # Total parameters in billions — the numeric model size the cost estimator reads directly
67
- # (no parsing of the ``params`` display string). Curated per catalog model below.
74
+ # (no parsing of the ``params`` display string). Drives the memory/size terms (VRAM, disk,
75
+ # download), which always size the FULL checkpoint. Curated per catalog model below.
68
76
  params_b: float = 0.0
77
+ # Parameters ACTIVE per token in billions — only meaningful for an MoE, where a token routes
78
+ # through a small subset of experts. The cost estimator's per-token FLOPs/step-time term reads
79
+ # this (a token exercises only the active params), while VRAM/disk/download keep using the total
80
+ # ``params_b``. 0.0 (the dense default) means "same as params_b" — every token hits every param.
81
+ active_params_b: float = 0.0
69
82
 
70
83
  def to_dict(self) -> dict[str, Any]:
71
84
  return asdict(self)
@@ -89,7 +102,7 @@ MODELS: dict[str, ModelInfo] = {
89
102
  thinking="hybrid",
90
103
  notes="On-device class SLM (131k ctx); standard Llama architecture.",
91
104
  ),
92
- # ---- Qwen3.5 dense family: validated on the modern worker stack ----
105
+ # Qwen3.5 dense family: validated on the modern worker stack
93
106
  # (trl 1.x / vllm 0.19 / transformers 5.x). Trained + served TEXT-ONLY: the
94
107
  # checkpoints are natively multimodal, so LoRA excludes the vision tower and vLLM
95
108
  # loads language_model_only (see flash.engine.worker). Each entry passed a real
@@ -153,6 +166,64 @@ MODELS: dict[str, ModelInfo] = {
153
166
  "(two bf16 copies + KV + the 248k-vocab fp32 logits) needs an 80 GB-class card "
154
167
  "(grpo_min_vram_gb floor).",
155
168
  ),
169
+ # ---- Qwen3.6 MoE: the big-checkpoint tier (H200 for SFT, B200 for GRPO) ----
170
+ # 35B-A3B is a Mixture-of-Experts checkpoint: ~3B parameters are ACTIVE per token, but all 35B
171
+ # are materialized on the GPU, so the MEMORY/disk/download terms size the FULL 35B (~70 GB bf16)
172
+ # while the COMPUTE terms (activations, KV pool, rank-linear LoRA) size the ~3B active backbone
173
+ # (engine.vram is MoE-aware via active_params_b). bf16 LoRA, NOT QLoRA — same reason as the 9B.
174
+ # Because the resident weights dominate and the active compute is tiny, the GPU tier is set by
175
+ # how many weight copies each algorithm holds, NOT by context length:
176
+ # * SFT — ONE ~70 GB copy + small active-compute (~82 GB peak, ~flat in context) -> fits the
177
+ # 141 GB H200 with wide margin (context ~unbounded by VRAM). Live-validated on a B200; the
178
+ # H200 down-tier is the MoE-aware win (cheaper, plentiful stock).
179
+ # * GRPO — colocates the vLLM rollout, so TWO ~70 GB copies (trainer + engine) are resident at
180
+ # the rollout peak (~167 GB) -> needs the 180 GB B200; the H200 can't hold both. The MoE
181
+ # rollout weight-sync needed a fused-expert name fix (engine.worker.lora._remap_vl_sync_weights
182
+ # passes the multimodal ``model.language_model.*`` names through to vLLM's own mapper). Both
183
+ # single- and multi-turn GRPO live-validated on a B200.
184
+ "Qwen/Qwen3.6-35B-A3B": ModelInfo(
185
+ id="Qwen/Qwen3.6-35B-A3B",
186
+ display_name="Qwen3.6 35B-A3B (MoE)",
187
+ params="35B total / ~3B active (MoE)",
188
+ # TOTAL parameters (billions) the SFT VRAM equation + cost projection read. For an MoE
189
+ # checkpoint the size term is the TOTAL count, not the ~3B active: download/VRAM/disk size the
190
+ # FULL checkpoint that lands on the GPU (all experts are materialized). 35.0 is the CALIBRATED
191
+ # total: the live-validated single-B200 SFT fit depends on it — the honest-peak equation lands
192
+ # at the 180 GB B200's usable budget, and the marketing "~35.95B" figure tips it over (186 GB,
193
+ # see test_sft_equation_covers_honest_peak_across_seq_boundary). Keep 35.0.
194
+ params_b=35.0,
195
+ # ~3B ACTIVE per token (the "A3B" in the name): a token routes through a small subset of
196
+ # experts, so cost/step-time FLOPs scale with ~3B, not the 35B total. Without this the
197
+ # estimator would price SFT as if every token exercised all 35B params — ~10x too slow/costly.
198
+ active_params_b=3.0,
199
+ vocab_size=248_320,
200
+ algos=("sft", "grpo"),
201
+ min_vram_gb=141,
202
+ # Hard SFT floor: with MoE-aware sizing the SFT estimate is ~82 GB (the 70 GB resident weights
203
+ # dominate; the active-3B activations/KV are tiny), which would otherwise down-route to the
204
+ # 96 GB RTX Pro 6000 (consumer Blackwell, thin margin over the 70 GB base) or the 80 GB H100
205
+ # (too tight). Floor to 100 GB so SFT lands on the 141 GB H200 — a datacenter card with wide
206
+ # margin, ~$1.50/hr cheaper than the B200 and not needed here.
207
+ sft_min_vram_gb=100,
208
+ # GRPO floor = the 180 GB B200 (colocated GRPO holds two ~70 GB weight copies + a KV pool; the
209
+ # 141 GB H200 can't hold the trainer + vLLM rollout). The base ~167 GB two-copy estimate already
210
+ # routes GRPO to the B200, but setting the floor ALSO ENGAGES the long-context escalation —
211
+ # model_required_vram_gb only adds grpo_seq_escalation_gb when a grpo floor is set. The
212
+ # escalation keys on the ~3B ACTIVE params, so default/moderate GRPO still fits the B200 but a
213
+ # long (>~16k-token, e.g. 32k) rollout is sized PAST 180 GB and rejected at parse time, instead
214
+ # of booting a B200 and OOMing in vLLM's KV allocation.
215
+ grpo_min_vram_gb=180,
216
+ quant="bf16",
217
+ recommended_gpu="H200",
218
+ thinking="hybrid",
219
+ # ~70 GB bf16 checkpoint. Peak disk = HF download (~70 GB) + Xet temp (~70 GB) + per-step
220
+ # deployable-checkpoint saves; floor to 200 GB so the rent doesn't hit "No space left on
221
+ # device" (the runner raises gpu.disk_gb to this out of the box).
222
+ min_disk_gb=200,
223
+ notes="MoE (35B total / ~3B active), bf16 LoRA. SFT runs on the 141 GB H200 (the ~70 GB "
224
+ "weights dominate; active-3B compute keeps activations/KV tiny, so context is ~unbounded by "
225
+ "VRAM); colocated GRPO needs the 180 GB B200 (trainer + vLLM rollout = two 70 GB copies).",
226
+ ),
156
227
  }
157
228
 
158
229
 
@@ -17,9 +17,9 @@ from flash._logging import configure_logging, get_logger
17
17
  from flash._update_check import emit_update_notice, maybe_start_update_check
18
18
 
19
19
  # Command handlers + the patched client surface live in submodules; re-export them so
20
- # `flash.cli.main` stays the single public import surface (and so monkeypatching
21
- # `flash.cli.main.commands` reaches the bare globals the handlers read).
22
- from flash.cli.main.commands import ( # noqa: F401
20
+ # `flash.cli` stays the single public import surface (and so monkeypatching
21
+ # `flash.cli.commands` reaches the bare globals the handlers read).
22
+ from flash.cli.commands import ( # noqa: F401
23
23
  _CLI_DONE_STATES,
24
24
  _OK_STATES,
25
25
  _STARTER_ENV_PY,
@@ -34,6 +34,7 @@ from flash.cli.main.commands import ( # noqa: F401
34
34
  cmd_deployments,
35
35
  cmd_env_list,
36
36
  cmd_env_setup,
37
+ cmd_export,
37
38
  cmd_gpus,
38
39
  cmd_login,
39
40
  cmd_models,
@@ -45,9 +46,9 @@ from flash.cli.main.commands import ( # noqa: F401
45
46
  cmd_whoami,
46
47
  verify_freesolo_key,
47
48
  )
48
- from flash.cli.main.envpush import cmd_env_install, cmd_env_push
49
+ from flash.cli.envpush import cmd_env_install, cmd_env_push
49
50
 
50
- logger = get_logger("flash.cli.main")
51
+ logger = get_logger("flash.cli")
51
52
 
52
53
 
53
54
  def main(argv: list[str] | None = None) -> int:
@@ -194,6 +195,39 @@ def main(argv: list[str] | None = None) -> int:
194
195
  undeploy.add_argument("run_id")
195
196
  undeploy.set_defaults(func=cmd_undeploy)
196
197
 
198
+ export = sub.add_parser(
199
+ "export", help="export a trained adapter to your own HuggingFace repo"
200
+ )
201
+ export.add_argument(
202
+ "--adapter-id",
203
+ dest="adapter_id",
204
+ required=True,
205
+ help="the Freesolo adapter id (the run id) to export",
206
+ )
207
+ export.add_argument(
208
+ "--repository",
209
+ required=True,
210
+ help="destination HuggingFace repo 'owner/name' (created if it doesn't exist)",
211
+ )
212
+ export.add_argument(
213
+ "--api-key",
214
+ help="HuggingFace token with write access to --repository "
215
+ "(default: HF_TOKEN from your shell or a local .env / .env.local)",
216
+ )
217
+ export.add_argument(
218
+ "--step",
219
+ type=int,
220
+ default=None,
221
+ help="export a specific intermediate checkpoint (see `flash checkpoints <adapter-id>`) "
222
+ "instead of the run's final adapter; works even for a run cancelled mid-RL",
223
+ )
224
+ export.add_argument(
225
+ "--public",
226
+ action="store_true",
227
+ help="create the destination repo as public (default: private)",
228
+ )
229
+ export.set_defaults(func=cmd_export)
230
+
197
231
  deployments = sub.add_parser("deployments", help="list active serving deployments")
198
232
  deployments.set_defaults(func=cmd_deployments)
199
233
 
@@ -1,6 +1,6 @@
1
1
  import sys
2
2
 
3
- from flash.cli.main import main
3
+ from flash.cli import main
4
4
 
5
5
  if __name__ == "__main__":
6
6
  sys.exit(main())