ob-metaflow-stubs 6.0.8.1__py2.py3-none-any.whl → 6.0.8.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. metaflow-stubs/__init__.pyi +1003 -1003
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +5 -5
  8. metaflow-stubs/client/filecache.pyi +1 -1
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +4 -4
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/meta_files.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +39 -39
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +3 -3
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +2 -2
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +3 -3
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +2 -2
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +1 -1
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +1 -1
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +2 -2
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +2 -2
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +2 -2
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +2 -2
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +1 -1
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +1 -1
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +1 -1
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +1 -1
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +2 -2
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +1 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +4 -4
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +2 -2
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +2 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +3 -3
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +4 -4
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +1 -1
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +1 -1
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +1 -1
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +3 -3
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +1 -1
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +3 -3
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +1 -1
  115. metaflow-stubs/mf_extensions/outerbounds/toplevel/s3_proxy.pyi +1 -1
  116. metaflow-stubs/multicore_utils.pyi +1 -1
  117. metaflow-stubs/ob_internal.pyi +1 -1
  118. metaflow-stubs/packaging_sys/__init__.pyi +6 -6
  119. metaflow-stubs/packaging_sys/backend.pyi +3 -3
  120. metaflow-stubs/packaging_sys/distribution_support.pyi +4 -4
  121. metaflow-stubs/packaging_sys/tar_backend.pyi +5 -5
  122. metaflow-stubs/packaging_sys/utils.pyi +1 -1
  123. metaflow-stubs/packaging_sys/v1.pyi +2 -2
  124. metaflow-stubs/parameters.pyi +3 -3
  125. metaflow-stubs/plugins/__init__.pyi +8 -8
  126. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  127. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  128. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  129. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  130. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  131. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  132. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  133. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  134. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  135. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  136. metaflow-stubs/plugins/argo/argo_workflows.pyi +3 -3
  137. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +2 -2
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  139. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +2 -2
  140. metaflow-stubs/plugins/argo/exit_hooks.pyi +2 -2
  141. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  142. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  143. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  144. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  145. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  146. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  147. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  148. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  149. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +1 -1
  150. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  151. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  152. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  153. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  156. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +2 -2
  157. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  158. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  159. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  160. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +1 -1
  161. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  162. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  163. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  164. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  165. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  166. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  167. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  168. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  169. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  170. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  171. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  172. metaflow-stubs/plugins/cards/card_modules/components.pyi +3 -3
  173. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  174. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  175. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  176. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  177. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  178. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  179. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  180. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  181. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  182. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  183. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  184. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  185. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  186. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  187. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  188. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  189. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  190. metaflow-stubs/plugins/exit_hook/__init__.pyi +1 -1
  191. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +1 -1
  192. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  193. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  194. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  195. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +1 -1
  196. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  197. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  198. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  199. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  200. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  201. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  202. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  203. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  204. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  205. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  206. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  207. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  208. metaflow-stubs/plugins/optuna/__init__.pyi +1 -1
  209. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  210. metaflow-stubs/plugins/perimeters.pyi +1 -1
  211. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  212. metaflow-stubs/plugins/pypi/__init__.pyi +1 -1
  213. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  214. metaflow-stubs/plugins/pypi/conda_environment.pyi +2 -2
  215. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  216. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  217. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  218. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  219. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  220. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  221. metaflow-stubs/plugins/secrets/__init__.pyi +2 -2
  222. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  223. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  224. metaflow-stubs/plugins/secrets/secrets_func.pyi +1 -1
  225. metaflow-stubs/plugins/secrets/secrets_spec.pyi +1 -1
  226. metaflow-stubs/plugins/secrets/utils.pyi +1 -1
  227. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  228. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  229. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +1 -1
  230. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  231. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  232. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  233. metaflow-stubs/plugins/uv/uv_environment.pyi +1 -1
  234. metaflow-stubs/profilers/__init__.pyi +1 -1
  235. metaflow-stubs/pylint_wrapper.pyi +1 -1
  236. metaflow-stubs/runner/__init__.pyi +1 -1
  237. metaflow-stubs/runner/deployer.pyi +6 -6
  238. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  239. metaflow-stubs/runner/metaflow_runner.pyi +2 -2
  240. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  241. metaflow-stubs/runner/nbrun.pyi +1 -1
  242. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  243. metaflow-stubs/runner/utils.pyi +1 -1
  244. metaflow-stubs/system/__init__.pyi +1 -1
  245. metaflow-stubs/system/system_logger.pyi +1 -1
  246. metaflow-stubs/system/system_monitor.pyi +1 -1
  247. metaflow-stubs/tagging_util.pyi +1 -1
  248. metaflow-stubs/tuple_util.pyi +1 -1
  249. metaflow-stubs/user_configs/__init__.pyi +1 -1
  250. metaflow-stubs/user_configs/config_options.pyi +3 -3
  251. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  252. metaflow-stubs/user_decorators/__init__.pyi +1 -1
  253. metaflow-stubs/user_decorators/common.pyi +1 -1
  254. metaflow-stubs/user_decorators/mutable_flow.pyi +5 -5
  255. metaflow-stubs/user_decorators/mutable_step.pyi +3 -3
  256. metaflow-stubs/user_decorators/user_flow_decorator.pyi +4 -4
  257. metaflow-stubs/user_decorators/user_step_decorator.pyi +5 -5
  258. {ob_metaflow_stubs-6.0.8.1.dist-info → ob_metaflow_stubs-6.0.8.2.dist-info}/METADATA +1 -1
  259. ob_metaflow_stubs-6.0.8.2.dist-info/RECORD +262 -0
  260. ob_metaflow_stubs-6.0.8.1.dist-info/RECORD +0 -262
  261. {ob_metaflow_stubs-6.0.8.1.dist-info → ob_metaflow_stubs-6.0.8.2.dist-info}/WHEEL +0 -0
  262. {ob_metaflow_stubs-6.0.8.1.dist-info → ob_metaflow_stubs-6.0.8.2.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.17.1.0+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-08-21T23:31:59.916830 #
4
+ # Generated on 2025-08-22T06:36:31.648602 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import datetime
12
11
  import typing
12
+ import datetime
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -48,9 +48,9 @@ from . import plugins as plugins
48
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
49
49
  from . import includefile as includefile
50
50
  from .includefile import IncludeFile as IncludeFile
51
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
52
51
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
53
52
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
53
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
54
54
  from . import client as client
55
55
  from .client.core import namespace as namespace
56
56
  from .client.core import get_namespace as get_namespace
@@ -167,177 +167,294 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
167
167
  """
168
168
  ...
169
169
 
170
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
170
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
171
171
  """
172
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
172
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
173
173
 
174
174
  User code call
175
175
  --------------
176
- @ollama(
177
- models=[...],
176
+ @vllm(
177
+ model="...",
178
178
  ...
179
179
  )
180
180
 
181
181
  Valid backend options
182
182
  ---------------------
183
183
  - 'local': Run as a separate process on the local task machine.
184
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
185
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
186
184
 
187
185
  Valid model options
188
186
  -------------------
189
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
187
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
188
+
189
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
190
+ If you need multiple models, you must create multiple @vllm decorators.
190
191
 
191
192
 
192
193
  Parameters
193
194
  ----------
194
- models: list[str]
195
- List of Ollama containers running models in sidecars.
195
+ model: str
196
+ HuggingFace model identifier to be served by vLLM.
196
197
  backend: str
197
- Determines where and how to run the Ollama process.
198
- force_pull: bool
199
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
200
- cache_update_policy: str
201
- Cache update policy: "auto", "force", or "never".
202
- force_cache_update: bool
203
- Simple override for "force" cache update policy.
198
+ Determines where and how to run the vLLM process.
199
+ openai_api_server: bool
200
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
201
+ Default is False (uses native engine).
202
+ Set to True for backward compatibility with existing code.
204
203
  debug: bool
205
204
  Whether to turn on verbose debugging logs.
206
- circuit_breaker_config: dict
207
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
208
- timeout_config: dict
209
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
205
+ card_refresh_interval: int
206
+ Interval in seconds for refreshing the vLLM status card.
207
+ Only used when openai_api_server=True.
208
+ max_retries: int
209
+ Maximum number of retries checking for vLLM server startup.
210
+ Only used when openai_api_server=True.
211
+ retry_alert_frequency: int
212
+ Frequency of alert logs for vLLM server startup retries.
213
+ Only used when openai_api_server=True.
214
+ engine_args : dict
215
+ Additional keyword arguments to pass to the vLLM engine.
216
+ For example, `tensor_parallel_size=2`.
210
217
  """
211
218
  ...
212
219
 
213
- @typing.overload
214
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
220
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
215
221
  """
216
- Specifies the number of times the task corresponding
217
- to a step needs to be retried.
222
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
218
223
 
219
- This decorator is useful for handling transient errors, such as networking issues.
220
- If your task contains operations that can't be retried safely, e.g. database updates,
221
- it is advisable to annotate it with `@retry(times=0)`.
224
+ > Examples
222
225
 
223
- This can be used in conjunction with the `@catch` decorator. The `@catch`
224
- decorator will execute a no-op task after all retries have been exhausted,
225
- ensuring that the flow execution can continue.
226
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
227
+ ```python
228
+ @huggingface_hub
229
+ @step
230
+ def pull_model_from_huggingface(self):
231
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
232
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
233
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
234
+ # value of the function is a reference to the model in the backend storage.
235
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
236
+
237
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
238
+ self.llama_model = current.huggingface_hub.snapshot_download(
239
+ repo_id=self.model_id,
240
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
241
+ )
242
+ self.next(self.train)
243
+ ```
244
+
245
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
246
+ ```python
247
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
248
+ @step
249
+ def pull_model_from_huggingface(self):
250
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
251
+ ```
252
+
253
+ ```python
254
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
255
+ @step
256
+ def finetune_model(self):
257
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
258
+ # path_to_model will be /my-directory
259
+ ```
260
+
261
+ ```python
262
+ # Takes all the arguments passed to `snapshot_download`
263
+ # except for `local_dir`
264
+ @huggingface_hub(load=[
265
+ {
266
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
267
+ },
268
+ {
269
+ "repo_id": "myorg/mistral-lora",
270
+ "repo_type": "model",
271
+ },
272
+ ])
273
+ @step
274
+ def finetune_model(self):
275
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
276
+ # path_to_model will be /my-directory
277
+ ```
226
278
 
227
279
 
228
280
  Parameters
229
281
  ----------
230
- times : int, default 3
231
- Number of times to retry this task.
232
- minutes_between_retries : int, default 2
233
- Number of minutes between retries.
282
+ temp_dir_root : str, optional
283
+ The root directory that will hold the temporary directory where objects will be downloaded.
284
+
285
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
286
+ The list of repos (models/datasets) to load.
287
+
288
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
289
+
290
+ - If repo (model/dataset) is not found in the datastore:
291
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
292
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
293
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
294
+
295
+ - If repo is found in the datastore:
296
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
234
297
  """
235
298
  ...
236
299
 
237
300
  @typing.overload
238
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
301
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
302
+ """
303
+ Decorator prototype for all step decorators. This function gets specialized
304
+ and imported for all decorators types by _import_plugin_decorators().
305
+ """
239
306
  ...
240
307
 
241
308
  @typing.overload
242
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
309
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
243
310
  ...
244
311
 
245
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
312
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
246
313
  """
247
- Specifies the number of times the task corresponding
248
- to a step needs to be retried.
249
-
250
- This decorator is useful for handling transient errors, such as networking issues.
251
- If your task contains operations that can't be retried safely, e.g. database updates,
252
- it is advisable to annotate it with `@retry(times=0)`.
253
-
254
- This can be used in conjunction with the `@catch` decorator. The `@catch`
255
- decorator will execute a no-op task after all retries have been exhausted,
256
- ensuring that the flow execution can continue.
257
-
258
-
259
- Parameters
260
- ----------
261
- times : int, default 3
262
- Number of times to retry this task.
263
- minutes_between_retries : int, default 2
264
- Number of minutes between retries.
314
+ Decorator prototype for all step decorators. This function gets specialized
315
+ and imported for all decorators types by _import_plugin_decorators().
265
316
  """
266
317
  ...
267
318
 
268
319
  @typing.overload
269
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
320
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
270
321
  """
271
- Specifies that the step will success under all circumstances.
272
-
273
- The decorator will create an optional artifact, specified by `var`, which
274
- contains the exception raised. You can use it to detect the presence
275
- of errors, indicating that all happy-path artifacts produced by the step
276
- are missing.
322
+ Internal decorator to support Fast bakery
323
+ """
324
+ ...
325
+
326
+ @typing.overload
327
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
328
+ ...
329
+
330
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
331
+ """
332
+ Internal decorator to support Fast bakery
333
+ """
334
+ ...
335
+
336
+ @typing.overload
337
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
338
+ """
339
+ Specifies environment variables to be set prior to the execution of a step.
277
340
 
278
341
 
279
342
  Parameters
280
343
  ----------
281
- var : str, optional, default None
282
- Name of the artifact in which to store the caught exception.
283
- If not specified, the exception is not stored.
284
- print_exception : bool, default True
285
- Determines whether or not the exception is printed to
286
- stdout when caught.
344
+ vars : Dict[str, str], default {}
345
+ Dictionary of environment variables to set.
287
346
  """
288
347
  ...
289
348
 
290
349
  @typing.overload
291
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
350
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
292
351
  ...
293
352
 
294
353
  @typing.overload
295
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
354
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
296
355
  ...
297
356
 
298
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
357
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
299
358
  """
300
- Specifies that the step will success under all circumstances.
359
+ Specifies environment variables to be set prior to the execution of a step.
301
360
 
302
- The decorator will create an optional artifact, specified by `var`, which
303
- contains the exception raised. You can use it to detect the presence
304
- of errors, indicating that all happy-path artifacts produced by the step
305
- are missing.
361
+
362
+ Parameters
363
+ ----------
364
+ vars : Dict[str, str], default {}
365
+ Dictionary of environment variables to set.
366
+ """
367
+ ...
368
+
369
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
370
+ """
371
+ Specifies that this step should execute on DGX cloud.
306
372
 
307
373
 
308
374
  Parameters
309
375
  ----------
310
- var : str, optional, default None
311
- Name of the artifact in which to store the caught exception.
312
- If not specified, the exception is not stored.
313
- print_exception : bool, default True
314
- Determines whether or not the exception is printed to
315
- stdout when caught.
376
+ gpu : int
377
+ Number of GPUs to use.
378
+ gpu_type : str
379
+ Type of Nvidia GPU to use.
380
+ queue_timeout : int
381
+ Time to keep the job in NVCF's queue.
316
382
  """
317
383
  ...
318
384
 
319
385
  @typing.overload
320
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
386
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
321
387
  """
322
- Enables loading / saving of models within a step.
323
-
324
- > Examples
325
- - Saving Models
326
- ```python
327
- @model
328
- @step
329
- def train(self):
330
- # current.model.save returns a dictionary reference to the model saved
331
- self.my_model = current.model.save(
332
- path_to_my_model,
333
- label="my_model",
334
- metadata={
335
- "epochs": 10,
336
- "batch-size": 32,
337
- "learning-rate": 0.001,
338
- }
339
- )
340
- self.next(self.test)
388
+ Specifies the PyPI packages for the step.
389
+
390
+ Information in this decorator will augment any
391
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
392
+ you can use `@pypi_base` to set packages required by all
393
+ steps and use `@pypi` to specify step-specific overrides.
394
+
395
+
396
+ Parameters
397
+ ----------
398
+ packages : Dict[str, str], default: {}
399
+ Packages to use for this step. The key is the name of the package
400
+ and the value is the version to use.
401
+ python : str, optional, default: None
402
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
403
+ that the version used will correspond to the version of the Python interpreter used to start the run.
404
+ """
405
+ ...
406
+
407
+ @typing.overload
408
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
409
+ ...
410
+
411
+ @typing.overload
412
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
413
+ ...
414
+
415
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
416
+ """
417
+ Specifies the PyPI packages for the step.
418
+
419
+ Information in this decorator will augment any
420
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
421
+ you can use `@pypi_base` to set packages required by all
422
+ steps and use `@pypi` to specify step-specific overrides.
423
+
424
+
425
+ Parameters
426
+ ----------
427
+ packages : Dict[str, str], default: {}
428
+ Packages to use for this step. The key is the name of the package
429
+ and the value is the version to use.
430
+ python : str, optional, default: None
431
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
432
+ that the version used will correspond to the version of the Python interpreter used to start the run.
433
+ """
434
+ ...
435
+
436
+ @typing.overload
437
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
438
+ """
439
+ Enables loading / saving of models within a step.
440
+
441
+ > Examples
442
+ - Saving Models
443
+ ```python
444
+ @model
445
+ @step
446
+ def train(self):
447
+ # current.model.save returns a dictionary reference to the model saved
448
+ self.my_model = current.model.save(
449
+ path_to_my_model,
450
+ label="my_model",
451
+ metadata={
452
+ "epochs": 10,
453
+ "batch-size": 32,
454
+ "learning-rate": 0.001,
455
+ }
456
+ )
457
+ self.next(self.test)
341
458
 
342
459
  @model(load="my_model")
343
460
  @step
@@ -445,53 +562,149 @@ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
445
562
  """
446
563
  ...
447
564
 
448
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
565
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
449
566
  """
450
- This decorator is used to run vllm APIs as Metaflow task sidecars.
567
+ Specifies that this step should execute on DGX cloud.
451
568
 
452
- User code call
453
- --------------
454
- @vllm(
455
- model="...",
456
- ...
457
- )
458
569
 
459
- Valid backend options
460
- ---------------------
461
- - 'local': Run as a separate process on the local task machine.
570
+ Parameters
571
+ ----------
572
+ gpu : int
573
+ Number of GPUs to use.
574
+ gpu_type : str
575
+ Type of Nvidia GPU to use.
576
+ """
577
+ ...
578
+
579
+ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
580
+ """
581
+ S3 Proxy decorator for routing S3 requests through a local proxy service.
462
582
 
463
- Valid model options
464
- -------------------
465
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
466
583
 
467
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
468
- If you need multiple models, you must create multiple @vllm decorators.
584
+ Parameters
585
+ ----------
586
+ integration_name : str, optional
587
+ Name of the S3 proxy integration. If not specified, will use the only
588
+ available S3 proxy integration in the namespace (fails if multiple exist).
589
+ write_mode : str, optional
590
+ The desired behavior during write operations to target (origin) S3 bucket.
591
+ allowed options are:
592
+ "origin-and-cache" -> write to both the target S3 bucket and local object
593
+ storage
594
+ "origin" -> only write to the target S3 bucket
595
+ "cache" -> only write to the object storage service used for caching
596
+ debug : bool, optional
597
+ Enable debug logging for proxy operations.
598
+ """
599
+ ...
600
+
601
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
602
+ """
603
+ Specifies that this step should execute on Kubernetes.
469
604
 
470
605
 
471
606
  Parameters
472
607
  ----------
473
- model: str
474
- HuggingFace model identifier to be served by vLLM.
475
- backend: str
476
- Determines where and how to run the vLLM process.
477
- openai_api_server: bool
478
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
479
- Default is False (uses native engine).
480
- Set to True for backward compatibility with existing code.
481
- debug: bool
482
- Whether to turn on verbose debugging logs.
483
- card_refresh_interval: int
484
- Interval in seconds for refreshing the vLLM status card.
485
- Only used when openai_api_server=True.
486
- max_retries: int
487
- Maximum number of retries checking for vLLM server startup.
488
- Only used when openai_api_server=True.
489
- retry_alert_frequency: int
490
- Frequency of alert logs for vLLM server startup retries.
491
- Only used when openai_api_server=True.
492
- engine_args : dict
493
- Additional keyword arguments to pass to the vLLM engine.
494
- For example, `tensor_parallel_size=2`.
608
+ cpu : int, default 1
609
+ Number of CPUs required for this step. If `@resources` is
610
+ also present, the maximum value from all decorators is used.
611
+ memory : int, default 4096
612
+ Memory size (in MB) required for this step. If
613
+ `@resources` is also present, the maximum value from all decorators is
614
+ used.
615
+ disk : int, default 10240
616
+ Disk size (in MB) required for this step. If
617
+ `@resources` is also present, the maximum value from all decorators is
618
+ used.
619
+ image : str, optional, default None
620
+ Docker image to use when launching on Kubernetes. If not specified, and
621
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
622
+ not, a default Docker image mapping to the current version of Python is used.
623
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
624
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
625
+ image_pull_secrets: List[str], default []
626
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
627
+ Kubernetes image pull secrets to use when pulling container images
628
+ in Kubernetes.
629
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
630
+ Kubernetes service account to use when launching pod in Kubernetes.
631
+ secrets : List[str], optional, default None
632
+ Kubernetes secrets to use when launching pod in Kubernetes. These
633
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
634
+ in Metaflow configuration.
635
+ node_selector: Union[Dict[str,str], str], optional, default None
636
+ Kubernetes node selector(s) to apply to the pod running the task.
637
+ Can be passed in as a comma separated string of values e.g.
638
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
639
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
640
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
641
+ Kubernetes namespace to use when launching pod in Kubernetes.
642
+ gpu : int, optional, default None
643
+ Number of GPUs required for this step. A value of zero implies that
644
+ the scheduled node should not have GPUs.
645
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
646
+ The vendor of the GPUs to be used for this step.
647
+ tolerations : List[Dict[str,str]], default []
648
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
649
+ Kubernetes tolerations to use when launching pod in Kubernetes.
650
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
651
+ Kubernetes labels to use when launching pod in Kubernetes.
652
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
653
+ Kubernetes annotations to use when launching pod in Kubernetes.
654
+ use_tmpfs : bool, default False
655
+ This enables an explicit tmpfs mount for this step.
656
+ tmpfs_tempdir : bool, default True
657
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
658
+ tmpfs_size : int, optional, default: None
659
+ The value for the size (in MiB) of the tmpfs mount for this step.
660
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
661
+ memory allocated for this step.
662
+ tmpfs_path : str, optional, default /metaflow_temp
663
+ Path to tmpfs mount for this step.
664
+ persistent_volume_claims : Dict[str, str], optional, default None
665
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
666
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
667
+ shared_memory: int, optional
668
+ Shared memory size (in MiB) required for this step
669
+ port: int, optional
670
+ Port number to specify in the Kubernetes job object
671
+ compute_pool : str, optional, default None
672
+ Compute pool to be used for for this step.
673
+ If not specified, any accessible compute pool within the perimeter is used.
674
+ hostname_resolution_timeout: int, default 10 * 60
675
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
676
+ Only applicable when @parallel is used.
677
+ qos: str, default: Burstable
678
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
679
+
680
+ security_context: Dict[str, Any], optional, default None
681
+ Container security context. Applies to the task container. Allows the following keys:
682
+ - privileged: bool, optional, default None
683
+ - allow_privilege_escalation: bool, optional, default None
684
+ - run_as_user: int, optional, default None
685
+ - run_as_group: int, optional, default None
686
+ - run_as_non_root: bool, optional, default None
687
+ """
688
+ ...
689
+
690
+ @typing.overload
691
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
692
+ """
693
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
694
+ It exists to make it easier for users to know that this decorator should only be used with
695
+ a Neo Cloud like Nebius.
696
+ """
697
+ ...
698
+
699
+ @typing.overload
700
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
701
+ ...
702
+
703
+ def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
704
+ """
705
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
706
+ It exists to make it easier for users to know that this decorator should only be used with
707
+ a Neo Cloud like Nebius.
495
708
  """
496
709
  ...
497
710
 
@@ -555,648 +768,96 @@ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None],
555
768
  ...
556
769
 
557
770
  @typing.overload
558
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
771
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
559
772
  """
560
- Enables checkpointing for a step.
773
+ Specifies that the step will success under all circumstances.
561
774
 
562
- > Examples
563
-
564
- - Saving Checkpoints
565
-
566
- ```python
567
- @checkpoint
568
- @step
569
- def train(self):
570
- model = create_model(self.parameters, checkpoint_path = None)
571
- for i in range(self.epochs):
572
- # some training logic
573
- loss = model.train(self.dataset)
574
- if i % 10 == 0:
575
- model.save(
576
- current.checkpoint.directory,
577
- )
578
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
579
- # and returns a reference dictionary to the checkpoint saved in the datastore
580
- self.latest_checkpoint = current.checkpoint.save(
581
- name="epoch_checkpoint",
582
- metadata={
583
- "epoch": i,
584
- "loss": loss,
585
- }
586
- )
587
- ```
588
-
589
- - Using Loaded Checkpoints
590
-
591
- ```python
592
- @retry(times=3)
593
- @checkpoint
594
- @step
595
- def train(self):
596
- # Assume that the task has restarted and the previous attempt of the task
597
- # saved a checkpoint
598
- checkpoint_path = None
599
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
600
- print("Loaded checkpoint from the previous attempt")
601
- checkpoint_path = current.checkpoint.directory
602
-
603
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
604
- for i in range(self.epochs):
605
- ...
606
- ```
607
-
608
-
609
- Parameters
610
- ----------
611
- load_policy : str, default: "fresh"
612
- The policy for loading the checkpoint. The following policies are supported:
613
- - "eager": Loads the the latest available checkpoint within the namespace.
614
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
615
- will be loaded at the start of the task.
616
- - "none": Do not load any checkpoint
617
- - "fresh": Loads the lastest checkpoint created within the running Task.
618
- This mode helps loading checkpoints across various retry attempts of the same task.
619
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
620
- created within the task will be loaded when the task is retries execution on failure.
621
-
622
- temp_dir_root : str, default: None
623
- The root directory under which `current.checkpoint.directory` will be created.
624
- """
625
- ...
626
-
627
- @typing.overload
628
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
629
- ...
630
-
631
- @typing.overload
632
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
633
- ...
634
-
635
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
636
- """
637
- Enables checkpointing for a step.
638
-
639
- > Examples
640
-
641
- - Saving Checkpoints
642
-
643
- ```python
644
- @checkpoint
645
- @step
646
- def train(self):
647
- model = create_model(self.parameters, checkpoint_path = None)
648
- for i in range(self.epochs):
649
- # some training logic
650
- loss = model.train(self.dataset)
651
- if i % 10 == 0:
652
- model.save(
653
- current.checkpoint.directory,
654
- )
655
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
656
- # and returns a reference dictionary to the checkpoint saved in the datastore
657
- self.latest_checkpoint = current.checkpoint.save(
658
- name="epoch_checkpoint",
659
- metadata={
660
- "epoch": i,
661
- "loss": loss,
662
- }
663
- )
664
- ```
665
-
666
- - Using Loaded Checkpoints
667
-
668
- ```python
669
- @retry(times=3)
670
- @checkpoint
671
- @step
672
- def train(self):
673
- # Assume that the task has restarted and the previous attempt of the task
674
- # saved a checkpoint
675
- checkpoint_path = None
676
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
677
- print("Loaded checkpoint from the previous attempt")
678
- checkpoint_path = current.checkpoint.directory
679
-
680
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
681
- for i in range(self.epochs):
682
- ...
683
- ```
684
-
685
-
686
- Parameters
687
- ----------
688
- load_policy : str, default: "fresh"
689
- The policy for loading the checkpoint. The following policies are supported:
690
- - "eager": Loads the the latest available checkpoint within the namespace.
691
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
692
- will be loaded at the start of the task.
693
- - "none": Do not load any checkpoint
694
- - "fresh": Loads the lastest checkpoint created within the running Task.
695
- This mode helps loading checkpoints across various retry attempts of the same task.
696
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
697
- created within the task will be loaded when the task is retries execution on failure.
698
-
699
- temp_dir_root : str, default: None
700
- The root directory under which `current.checkpoint.directory` will be created.
701
- """
702
- ...
703
-
704
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
705
- """
706
- Decorator that helps cache, version and store models/datasets from huggingface hub.
707
-
708
- > Examples
709
-
710
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
711
- ```python
712
- @huggingface_hub
713
- @step
714
- def pull_model_from_huggingface(self):
715
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
716
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
717
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
718
- # value of the function is a reference to the model in the backend storage.
719
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
720
-
721
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
722
- self.llama_model = current.huggingface_hub.snapshot_download(
723
- repo_id=self.model_id,
724
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
725
- )
726
- self.next(self.train)
727
- ```
728
-
729
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
730
- ```python
731
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
732
- @step
733
- def pull_model_from_huggingface(self):
734
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
735
- ```
736
-
737
- ```python
738
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
739
- @step
740
- def finetune_model(self):
741
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
742
- # path_to_model will be /my-directory
743
- ```
744
-
745
- ```python
746
- # Takes all the arguments passed to `snapshot_download`
747
- # except for `local_dir`
748
- @huggingface_hub(load=[
749
- {
750
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
751
- },
752
- {
753
- "repo_id": "myorg/mistral-lora",
754
- "repo_type": "model",
755
- },
756
- ])
757
- @step
758
- def finetune_model(self):
759
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
760
- # path_to_model will be /my-directory
761
- ```
762
-
763
-
764
- Parameters
765
- ----------
766
- temp_dir_root : str, optional
767
- The root directory that will hold the temporary directory where objects will be downloaded.
768
-
769
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
770
- The list of repos (models/datasets) to load.
771
-
772
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
773
-
774
- - If repo (model/dataset) is not found in the datastore:
775
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
776
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
777
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
778
-
779
- - If repo is found in the datastore:
780
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
781
- """
782
- ...
783
-
784
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
785
- """
786
- Specifies that this step should execute on Kubernetes.
787
-
788
-
789
- Parameters
790
- ----------
791
- cpu : int, default 1
792
- Number of CPUs required for this step. If `@resources` is
793
- also present, the maximum value from all decorators is used.
794
- memory : int, default 4096
795
- Memory size (in MB) required for this step. If
796
- `@resources` is also present, the maximum value from all decorators is
797
- used.
798
- disk : int, default 10240
799
- Disk size (in MB) required for this step. If
800
- `@resources` is also present, the maximum value from all decorators is
801
- used.
802
- image : str, optional, default None
803
- Docker image to use when launching on Kubernetes. If not specified, and
804
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
805
- not, a default Docker image mapping to the current version of Python is used.
806
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
807
- If given, the imagePullPolicy to be applied to the Docker image of the step.
808
- image_pull_secrets: List[str], default []
809
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
810
- Kubernetes image pull secrets to use when pulling container images
811
- in Kubernetes.
812
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
813
- Kubernetes service account to use when launching pod in Kubernetes.
814
- secrets : List[str], optional, default None
815
- Kubernetes secrets to use when launching pod in Kubernetes. These
816
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
817
- in Metaflow configuration.
818
- node_selector: Union[Dict[str,str], str], optional, default None
819
- Kubernetes node selector(s) to apply to the pod running the task.
820
- Can be passed in as a comma separated string of values e.g.
821
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
822
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
823
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
824
- Kubernetes namespace to use when launching pod in Kubernetes.
825
- gpu : int, optional, default None
826
- Number of GPUs required for this step. A value of zero implies that
827
- the scheduled node should not have GPUs.
828
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
829
- The vendor of the GPUs to be used for this step.
830
- tolerations : List[Dict[str,str]], default []
831
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
832
- Kubernetes tolerations to use when launching pod in Kubernetes.
833
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
834
- Kubernetes labels to use when launching pod in Kubernetes.
835
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
836
- Kubernetes annotations to use when launching pod in Kubernetes.
837
- use_tmpfs : bool, default False
838
- This enables an explicit tmpfs mount for this step.
839
- tmpfs_tempdir : bool, default True
840
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
841
- tmpfs_size : int, optional, default: None
842
- The value for the size (in MiB) of the tmpfs mount for this step.
843
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
844
- memory allocated for this step.
845
- tmpfs_path : str, optional, default /metaflow_temp
846
- Path to tmpfs mount for this step.
847
- persistent_volume_claims : Dict[str, str], optional, default None
848
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
849
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
850
- shared_memory: int, optional
851
- Shared memory size (in MiB) required for this step
852
- port: int, optional
853
- Port number to specify in the Kubernetes job object
854
- compute_pool : str, optional, default None
855
- Compute pool to be used for for this step.
856
- If not specified, any accessible compute pool within the perimeter is used.
857
- hostname_resolution_timeout: int, default 10 * 60
858
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
859
- Only applicable when @parallel is used.
860
- qos: str, default: Burstable
861
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
862
-
863
- security_context: Dict[str, Any], optional, default None
864
- Container security context. Applies to the task container. Allows the following keys:
865
- - privileged: bool, optional, default None
866
- - allow_privilege_escalation: bool, optional, default None
867
- - run_as_user: int, optional, default None
868
- - run_as_group: int, optional, default None
869
- - run_as_non_root: bool, optional, default None
870
- """
871
- ...
872
-
873
- @typing.overload
874
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
875
- """
876
- Specifies environment variables to be set prior to the execution of a step.
877
-
878
-
879
- Parameters
880
- ----------
881
- vars : Dict[str, str], default {}
882
- Dictionary of environment variables to set.
883
- """
884
- ...
885
-
886
- @typing.overload
887
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
888
- ...
889
-
890
- @typing.overload
891
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
892
- ...
893
-
894
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
895
- """
896
- Specifies environment variables to be set prior to the execution of a step.
897
-
898
-
899
- Parameters
900
- ----------
901
- vars : Dict[str, str], default {}
902
- Dictionary of environment variables to set.
903
- """
904
- ...
905
-
906
- @typing.overload
907
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
908
- """
909
- Specifies the PyPI packages for the step.
910
-
911
- Information in this decorator will augment any
912
- attributes set in the `@pyi_base` flow-level decorator. Hence,
913
- you can use `@pypi_base` to set packages required by all
914
- steps and use `@pypi` to specify step-specific overrides.
915
-
916
-
917
- Parameters
918
- ----------
919
- packages : Dict[str, str], default: {}
920
- Packages to use for this step. The key is the name of the package
921
- and the value is the version to use.
922
- python : str, optional, default: None
923
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
924
- that the version used will correspond to the version of the Python interpreter used to start the run.
925
- """
926
- ...
927
-
928
- @typing.overload
929
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
930
- ...
931
-
932
- @typing.overload
933
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
934
- ...
935
-
936
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
937
- """
938
- Specifies the PyPI packages for the step.
939
-
940
- Information in this decorator will augment any
941
- attributes set in the `@pyi_base` flow-level decorator. Hence,
942
- you can use `@pypi_base` to set packages required by all
943
- steps and use `@pypi` to specify step-specific overrides.
775
+ The decorator will create an optional artifact, specified by `var`, which
776
+ contains the exception raised. You can use it to detect the presence
777
+ of errors, indicating that all happy-path artifacts produced by the step
778
+ are missing.
944
779
 
945
780
 
946
781
  Parameters
947
782
  ----------
948
- packages : Dict[str, str], default: {}
949
- Packages to use for this step. The key is the name of the package
950
- and the value is the version to use.
951
- python : str, optional, default: None
952
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
953
- that the version used will correspond to the version of the Python interpreter used to start the run.
954
- """
955
- ...
956
-
957
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
958
- """
959
- Specifies that this step should execute on DGX cloud.
960
-
961
-
962
- Parameters
963
- ----------
964
- gpu : int
965
- Number of GPUs to use.
966
- gpu_type : str
967
- Type of Nvidia GPU to use.
968
- queue_timeout : int
969
- Time to keep the job in NVCF's queue.
970
- """
971
- ...
972
-
973
- @typing.overload
974
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
975
- """
976
- A simple decorator that demonstrates using CardDecoratorInjector
977
- to inject a card and render simple markdown content.
978
- """
979
- ...
980
-
981
- @typing.overload
982
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
983
- ...
984
-
985
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
986
- """
987
- A simple decorator that demonstrates using CardDecoratorInjector
988
- to inject a card and render simple markdown content.
989
- """
990
- ...
991
-
992
- @typing.overload
993
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
994
- """
995
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
996
- It exists to make it easier for users to know that this decorator should only be used with
997
- a Neo Cloud like Nebius.
998
- """
999
- ...
1000
-
1001
- @typing.overload
1002
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1003
- ...
1004
-
1005
- def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1006
- """
1007
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1008
- It exists to make it easier for users to know that this decorator should only be used with
1009
- a Neo Cloud like Nebius.
1010
- """
1011
- ...
1012
-
1013
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1014
- """
1015
- Specifies that this step should execute on DGX cloud.
1016
-
1017
-
1018
- Parameters
1019
- ----------
1020
- gpu : int
1021
- Number of GPUs to use.
1022
- gpu_type : str
1023
- Type of Nvidia GPU to use.
1024
- """
1025
- ...
1026
-
1027
- @typing.overload
1028
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1029
- """
1030
- Specifies secrets to be retrieved and injected as environment variables prior to
1031
- the execution of a step.
1032
-
1033
-
1034
- Parameters
1035
- ----------
1036
- sources : List[Union[str, Dict[str, Any]]], default: []
1037
- List of secret specs, defining how the secrets are to be retrieved
1038
- role : str, optional, default: None
1039
- Role to use for fetching secrets
783
+ var : str, optional, default None
784
+ Name of the artifact in which to store the caught exception.
785
+ If not specified, the exception is not stored.
786
+ print_exception : bool, default True
787
+ Determines whether or not the exception is printed to
788
+ stdout when caught.
1040
789
  """
1041
790
  ...
1042
791
 
1043
792
  @typing.overload
1044
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
793
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1045
794
  ...
1046
795
 
1047
796
  @typing.overload
1048
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1049
- ...
1050
-
1051
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1052
- """
1053
- Specifies secrets to be retrieved and injected as environment variables prior to
1054
- the execution of a step.
1055
-
1056
-
1057
- Parameters
1058
- ----------
1059
- sources : List[Union[str, Dict[str, Any]]], default: []
1060
- List of secret specs, defining how the secrets are to be retrieved
1061
- role : str, optional, default: None
1062
- Role to use for fetching secrets
1063
- """
797
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1064
798
  ...
1065
799
 
1066
- @typing.overload
1067
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
800
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1068
801
  """
1069
- Specifies the resources needed when executing this step.
1070
-
1071
- Use `@resources` to specify the resource requirements
1072
- independently of the specific compute layer (`@batch`, `@kubernetes`).
802
+ Specifies that the step will success under all circumstances.
1073
803
 
1074
- You can choose the compute layer on the command line by executing e.g.
1075
- ```
1076
- python myflow.py run --with batch
1077
- ```
1078
- or
1079
- ```
1080
- python myflow.py run --with kubernetes
1081
- ```
1082
- which executes the flow on the desired system using the
1083
- requirements specified in `@resources`.
804
+ The decorator will create an optional artifact, specified by `var`, which
805
+ contains the exception raised. You can use it to detect the presence
806
+ of errors, indicating that all happy-path artifacts produced by the step
807
+ are missing.
1084
808
 
1085
809
 
1086
810
  Parameters
1087
811
  ----------
1088
- cpu : int, default 1
1089
- Number of CPUs required for this step.
1090
- gpu : int, optional, default None
1091
- Number of GPUs required for this step.
1092
- disk : int, optional, default None
1093
- Disk size (in MB) required for this step. Only applies on Kubernetes.
1094
- memory : int, default 4096
1095
- Memory size (in MB) required for this step.
1096
- shared_memory : int, optional, default None
1097
- The value for the size (in MiB) of the /dev/shm volume for this step.
1098
- This parameter maps to the `--shm-size` option in Docker.
812
+ var : str, optional, default None
813
+ Name of the artifact in which to store the caught exception.
814
+ If not specified, the exception is not stored.
815
+ print_exception : bool, default True
816
+ Determines whether or not the exception is printed to
817
+ stdout when caught.
1099
818
  """
1100
819
  ...
1101
820
 
1102
- @typing.overload
1103
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1104
- ...
1105
-
1106
- @typing.overload
1107
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1108
- ...
1109
-
1110
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
821
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1111
822
  """
1112
- Specifies the resources needed when executing this step.
1113
-
1114
- Use `@resources` to specify the resource requirements
1115
- independently of the specific compute layer (`@batch`, `@kubernetes`).
823
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
1116
824
 
1117
- You can choose the compute layer on the command line by executing e.g.
1118
- ```
1119
- python myflow.py run --with batch
1120
- ```
1121
- or
1122
- ```
1123
- python myflow.py run --with kubernetes
1124
- ```
1125
- which executes the flow on the desired system using the
1126
- requirements specified in `@resources`.
825
+ User code call
826
+ --------------
827
+ @ollama(
828
+ models=[...],
829
+ ...
830
+ )
1127
831
 
832
+ Valid backend options
833
+ ---------------------
834
+ - 'local': Run as a separate process on the local task machine.
835
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
836
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1128
837
 
1129
- Parameters
1130
- ----------
1131
- cpu : int, default 1
1132
- Number of CPUs required for this step.
1133
- gpu : int, optional, default None
1134
- Number of GPUs required for this step.
1135
- disk : int, optional, default None
1136
- Disk size (in MB) required for this step. Only applies on Kubernetes.
1137
- memory : int, default 4096
1138
- Memory size (in MB) required for this step.
1139
- shared_memory : int, optional, default None
1140
- The value for the size (in MiB) of the /dev/shm volume for this step.
1141
- This parameter maps to the `--shm-size` option in Docker.
1142
- """
1143
- ...
1144
-
1145
- @typing.overload
1146
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1147
- """
1148
- Decorator prototype for all step decorators. This function gets specialized
1149
- and imported for all decorators types by _import_plugin_decorators().
1150
- """
1151
- ...
1152
-
1153
- @typing.overload
1154
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1155
- ...
1156
-
1157
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1158
- """
1159
- Decorator prototype for all step decorators. This function gets specialized
1160
- and imported for all decorators types by _import_plugin_decorators().
1161
- """
1162
- ...
1163
-
1164
- def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1165
- """
1166
- S3 Proxy decorator for routing S3 requests through a local proxy service.
838
+ Valid model options
839
+ -------------------
840
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
1167
841
 
1168
842
 
1169
843
  Parameters
1170
844
  ----------
1171
- integration_name : str, optional
1172
- Name of the S3 proxy integration. If not specified, will use the only
1173
- available S3 proxy integration in the namespace (fails if multiple exist).
1174
- write_mode : str, optional
1175
- The desired behavior during write operations to target (origin) S3 bucket.
1176
- allowed options are:
1177
- "origin-and-cache" -> write to both the target S3 bucket and local object
1178
- storage
1179
- "origin" -> only write to the target S3 bucket
1180
- "cache" -> only write to the object storage service used for caching
1181
- debug : bool, optional
1182
- Enable debug logging for proxy operations.
1183
- """
1184
- ...
1185
-
1186
- @typing.overload
1187
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1188
- """
1189
- Internal decorator to support Fast bakery
1190
- """
1191
- ...
1192
-
1193
- @typing.overload
1194
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1195
- ...
1196
-
1197
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1198
- """
1199
- Internal decorator to support Fast bakery
845
+ models: list[str]
846
+ List of Ollama containers running models in sidecars.
847
+ backend: str
848
+ Determines where and how to run the Ollama process.
849
+ force_pull: bool
850
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
851
+ cache_update_policy: str
852
+ Cache update policy: "auto", "force", or "never".
853
+ force_cache_update: bool
854
+ Simple override for "force" cache update policy.
855
+ debug: bool
856
+ Whether to turn on verbose debugging logs.
857
+ circuit_breaker_config: dict
858
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
859
+ timeout_config: dict
860
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
1200
861
  """
1201
862
  ...
1202
863
 
@@ -1270,6 +931,80 @@ def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFla
1270
931
  """
1271
932
  ...
1272
933
 
934
+ @typing.overload
935
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
936
+ """
937
+ Decorator prototype for all step decorators. This function gets specialized
938
+ and imported for all decorators types by _import_plugin_decorators().
939
+ """
940
+ ...
941
+
942
+ @typing.overload
943
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
944
+ ...
945
+
946
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
947
+ """
948
+ Decorator prototype for all step decorators. This function gets specialized
949
+ and imported for all decorators types by _import_plugin_decorators().
950
+ """
951
+ ...
952
+
953
+ @typing.overload
954
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
955
+ """
956
+ Specifies the number of times the task corresponding
957
+ to a step needs to be retried.
958
+
959
+ This decorator is useful for handling transient errors, such as networking issues.
960
+ If your task contains operations that can't be retried safely, e.g. database updates,
961
+ it is advisable to annotate it with `@retry(times=0)`.
962
+
963
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
964
+ decorator will execute a no-op task after all retries have been exhausted,
965
+ ensuring that the flow execution can continue.
966
+
967
+
968
+ Parameters
969
+ ----------
970
+ times : int, default 3
971
+ Number of times to retry this task.
972
+ minutes_between_retries : int, default 2
973
+ Number of minutes between retries.
974
+ """
975
+ ...
976
+
977
+ @typing.overload
978
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
979
+ ...
980
+
981
+ @typing.overload
982
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
983
+ ...
984
+
985
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
986
+ """
987
+ Specifies the number of times the task corresponding
988
+ to a step needs to be retried.
989
+
990
+ This decorator is useful for handling transient errors, such as networking issues.
991
+ If your task contains operations that can't be retried safely, e.g. database updates,
992
+ it is advisable to annotate it with `@retry(times=0)`.
993
+
994
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
995
+ decorator will execute a no-op task after all retries have been exhausted,
996
+ ensuring that the flow execution can continue.
997
+
998
+
999
+ Parameters
1000
+ ----------
1001
+ times : int, default 3
1002
+ Number of times to retry this task.
1003
+ minutes_between_retries : int, default 2
1004
+ Number of minutes between retries.
1005
+ """
1006
+ ...
1007
+
1273
1008
  @typing.overload
1274
1009
  def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1275
1010
  """
@@ -1330,135 +1065,286 @@ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
1330
1065
  ...
1331
1066
 
1332
1067
  @typing.overload
1333
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1068
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1334
1069
  """
1335
- Decorator prototype for all step decorators. This function gets specialized
1336
- and imported for all decorators types by _import_plugin_decorators().
1070
+ Specifies secrets to be retrieved and injected as environment variables prior to
1071
+ the execution of a step.
1072
+
1073
+
1074
+ Parameters
1075
+ ----------
1076
+ sources : List[Union[str, Dict[str, Any]]], default: []
1077
+ List of secret specs, defining how the secrets are to be retrieved
1078
+ role : str, optional, default: None
1079
+ Role to use for fetching secrets
1337
1080
  """
1338
1081
  ...
1339
1082
 
1340
1083
  @typing.overload
1341
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1084
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1342
1085
  ...
1343
1086
 
1344
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1087
+ @typing.overload
1088
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1089
+ ...
1090
+
1091
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1345
1092
  """
1346
- Decorator prototype for all step decorators. This function gets specialized
1347
- and imported for all decorators types by _import_plugin_decorators().
1093
+ Specifies secrets to be retrieved and injected as environment variables prior to
1094
+ the execution of a step.
1095
+
1096
+
1097
+ Parameters
1098
+ ----------
1099
+ sources : List[Union[str, Dict[str, Any]]], default: []
1100
+ List of secret specs, defining how the secrets are to be retrieved
1101
+ role : str, optional, default: None
1102
+ Role to use for fetching secrets
1348
1103
  """
1349
1104
  ...
1350
1105
 
1351
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1106
+ @typing.overload
1107
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1352
1108
  """
1353
- Allows setting external datastores to save data for the
1354
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1109
+ A simple decorator that demonstrates using CardDecoratorInjector
1110
+ to inject a card and render simple markdown content.
1111
+ """
1112
+ ...
1113
+
1114
+ @typing.overload
1115
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1116
+ ...
1117
+
1118
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1119
+ """
1120
+ A simple decorator that demonstrates using CardDecoratorInjector
1121
+ to inject a card and render simple markdown content.
1122
+ """
1123
+ ...
1124
+
1125
+ @typing.overload
1126
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1127
+ """
1128
+ Enables checkpointing for a step.
1355
1129
 
1356
- This decorator is useful when users wish to save data to a different datastore
1357
- than what is configured in Metaflow. This can be for variety of reasons:
1130
+ > Examples
1358
1131
 
1359
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1360
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1361
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1362
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1363
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1132
+ - Saving Checkpoints
1364
1133
 
1365
- Usage:
1134
+ ```python
1135
+ @checkpoint
1136
+ @step
1137
+ def train(self):
1138
+ model = create_model(self.parameters, checkpoint_path = None)
1139
+ for i in range(self.epochs):
1140
+ # some training logic
1141
+ loss = model.train(self.dataset)
1142
+ if i % 10 == 0:
1143
+ model.save(
1144
+ current.checkpoint.directory,
1145
+ )
1146
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
1147
+ # and returns a reference dictionary to the checkpoint saved in the datastore
1148
+ self.latest_checkpoint = current.checkpoint.save(
1149
+ name="epoch_checkpoint",
1150
+ metadata={
1151
+ "epoch": i,
1152
+ "loss": loss,
1153
+ }
1154
+ )
1155
+ ```
1156
+
1157
+ - Using Loaded Checkpoints
1158
+
1159
+ ```python
1160
+ @retry(times=3)
1161
+ @checkpoint
1162
+ @step
1163
+ def train(self):
1164
+ # Assume that the task has restarted and the previous attempt of the task
1165
+ # saved a checkpoint
1166
+ checkpoint_path = None
1167
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1168
+ print("Loaded checkpoint from the previous attempt")
1169
+ checkpoint_path = current.checkpoint.directory
1170
+
1171
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1172
+ for i in range(self.epochs):
1173
+ ...
1174
+ ```
1175
+
1176
+
1177
+ Parameters
1366
1178
  ----------
1179
+ load_policy : str, default: "fresh"
1180
+ The policy for loading the checkpoint. The following policies are supported:
1181
+ - "eager": Loads the the latest available checkpoint within the namespace.
1182
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1183
+ will be loaded at the start of the task.
1184
+ - "none": Do not load any checkpoint
1185
+ - "fresh": Loads the lastest checkpoint created within the running Task.
1186
+ This mode helps loading checkpoints across various retry attempts of the same task.
1187
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1188
+ created within the task will be loaded when the task is retries execution on failure.
1367
1189
 
1368
- - Using a custom IAM role to access the datastore.
1190
+ temp_dir_root : str, default: None
1191
+ The root directory under which `current.checkpoint.directory` will be created.
1192
+ """
1193
+ ...
1194
+
1195
+ @typing.overload
1196
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1197
+ ...
1198
+
1199
+ @typing.overload
1200
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1201
+ ...
1202
+
1203
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
1204
+ """
1205
+ Enables checkpointing for a step.
1369
1206
 
1370
- ```python
1371
- @with_artifact_store(
1372
- type="s3",
1373
- config=lambda: {
1374
- "root": "s3://my-bucket-foo/path/to/root",
1375
- "role_arn": ROLE,
1376
- },
1377
- )
1378
- class MyFlow(FlowSpec):
1207
+ > Examples
1379
1208
 
1380
- @checkpoint
1381
- @step
1382
- def start(self):
1383
- with open("my_file.txt", "w") as f:
1384
- f.write("Hello, World!")
1385
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1386
- self.next(self.end)
1209
+ - Saving Checkpoints
1387
1210
 
1388
- ```
1211
+ ```python
1212
+ @checkpoint
1213
+ @step
1214
+ def train(self):
1215
+ model = create_model(self.parameters, checkpoint_path = None)
1216
+ for i in range(self.epochs):
1217
+ # some training logic
1218
+ loss = model.train(self.dataset)
1219
+ if i % 10 == 0:
1220
+ model.save(
1221
+ current.checkpoint.directory,
1222
+ )
1223
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
1224
+ # and returns a reference dictionary to the checkpoint saved in the datastore
1225
+ self.latest_checkpoint = current.checkpoint.save(
1226
+ name="epoch_checkpoint",
1227
+ metadata={
1228
+ "epoch": i,
1229
+ "loss": loss,
1230
+ }
1231
+ )
1232
+ ```
1389
1233
 
1390
- - Using credentials to access the s3-compatible datastore.
1234
+ - Using Loaded Checkpoints
1391
1235
 
1392
- ```python
1393
- @with_artifact_store(
1394
- type="s3",
1395
- config=lambda: {
1396
- "root": "s3://my-bucket-foo/path/to/root",
1397
- "client_params": {
1398
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1399
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1400
- },
1401
- },
1402
- )
1403
- class MyFlow(FlowSpec):
1236
+ ```python
1237
+ @retry(times=3)
1238
+ @checkpoint
1239
+ @step
1240
+ def train(self):
1241
+ # Assume that the task has restarted and the previous attempt of the task
1242
+ # saved a checkpoint
1243
+ checkpoint_path = None
1244
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1245
+ print("Loaded checkpoint from the previous attempt")
1246
+ checkpoint_path = current.checkpoint.directory
1404
1247
 
1405
- @checkpoint
1406
- @step
1407
- def start(self):
1408
- with open("my_file.txt", "w") as f:
1409
- f.write("Hello, World!")
1410
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1411
- self.next(self.end)
1248
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1249
+ for i in range(self.epochs):
1250
+ ...
1251
+ ```
1252
+
1253
+
1254
+ Parameters
1255
+ ----------
1256
+ load_policy : str, default: "fresh"
1257
+ The policy for loading the checkpoint. The following policies are supported:
1258
+ - "eager": Loads the the latest available checkpoint within the namespace.
1259
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1260
+ will be loaded at the start of the task.
1261
+ - "none": Do not load any checkpoint
1262
+ - "fresh": Loads the lastest checkpoint created within the running Task.
1263
+ This mode helps loading checkpoints across various retry attempts of the same task.
1264
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1265
+ created within the task will be loaded when the task is retries execution on failure.
1266
+
1267
+ temp_dir_root : str, default: None
1268
+ The root directory under which `current.checkpoint.directory` will be created.
1269
+ """
1270
+ ...
1271
+
1272
+ @typing.overload
1273
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1274
+ """
1275
+ Specifies the resources needed when executing this step.
1276
+
1277
+ Use `@resources` to specify the resource requirements
1278
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1279
+
1280
+ You can choose the compute layer on the command line by executing e.g.
1281
+ ```
1282
+ python myflow.py run --with batch
1283
+ ```
1284
+ or
1285
+ ```
1286
+ python myflow.py run --with kubernetes
1287
+ ```
1288
+ which executes the flow on the desired system using the
1289
+ requirements specified in `@resources`.
1412
1290
 
1413
- ```
1414
1291
 
1415
- - Accessing objects stored in external datastores after task execution.
1292
+ Parameters
1293
+ ----------
1294
+ cpu : int, default 1
1295
+ Number of CPUs required for this step.
1296
+ gpu : int, optional, default None
1297
+ Number of GPUs required for this step.
1298
+ disk : int, optional, default None
1299
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1300
+ memory : int, default 4096
1301
+ Memory size (in MB) required for this step.
1302
+ shared_memory : int, optional, default None
1303
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1304
+ This parameter maps to the `--shm-size` option in Docker.
1305
+ """
1306
+ ...
1307
+
1308
+ @typing.overload
1309
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1310
+ ...
1311
+
1312
+ @typing.overload
1313
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1314
+ ...
1315
+
1316
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1317
+ """
1318
+ Specifies the resources needed when executing this step.
1416
1319
 
1417
- ```python
1418
- run = Run("CheckpointsTestsFlow/8992")
1419
- with artifact_store_from(run=run, config={
1420
- "client_params": {
1421
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1422
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1423
- },
1424
- }):
1425
- with Checkpoint() as cp:
1426
- latest = cp.list(
1427
- task=run["start"].task
1428
- )[0]
1429
- print(latest)
1430
- cp.load(
1431
- latest,
1432
- "test-checkpoints"
1433
- )
1320
+ Use `@resources` to specify the resource requirements
1321
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1434
1322
 
1435
- task = Task("TorchTuneFlow/8484/train/53673")
1436
- with artifact_store_from(run=run, config={
1437
- "client_params": {
1438
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1439
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1440
- },
1441
- }):
1442
- load_model(
1443
- task.data.model_ref,
1444
- "test-models"
1445
- )
1446
- ```
1447
- Parameters:
1448
- ----------
1323
+ You can choose the compute layer on the command line by executing e.g.
1324
+ ```
1325
+ python myflow.py run --with batch
1326
+ ```
1327
+ or
1328
+ ```
1329
+ python myflow.py run --with kubernetes
1330
+ ```
1331
+ which executes the flow on the desired system using the
1332
+ requirements specified in `@resources`.
1449
1333
 
1450
- type: str
1451
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1452
1334
 
1453
- config: dict or Callable
1454
- Dictionary of configuration options for the datastore. The following keys are required:
1455
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1456
- - example: 's3://bucket-name/path/to/root'
1457
- - example: 'gs://bucket-name/path/to/root'
1458
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1459
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1460
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1461
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1335
+ Parameters
1336
+ ----------
1337
+ cpu : int, default 1
1338
+ Number of CPUs required for this step.
1339
+ gpu : int, optional, default None
1340
+ Number of GPUs required for this step.
1341
+ disk : int, optional, default None
1342
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1343
+ memory : int, default 4096
1344
+ Memory size (in MB) required for this step.
1345
+ shared_memory : int, optional, default None
1346
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1347
+ This parameter maps to the `--shm-size` option in Docker.
1462
1348
  """
1463
1349
  ...
1464
1350
 
@@ -1563,38 +1449,210 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1563
1449
  """
1564
1450
  ...
1565
1451
 
1566
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1452
+ @typing.overload
1453
+ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1567
1454
  """
1568
- Specifies what flows belong to the same project.
1455
+ Specifies the event(s) that this flow depends on.
1569
1456
 
1570
- A project-specific namespace is created for all flows that
1571
- use the same `@project(name)`.
1457
+ ```
1458
+ @trigger(event='foo')
1459
+ ```
1460
+ or
1461
+ ```
1462
+ @trigger(events=['foo', 'bar'])
1463
+ ```
1464
+
1465
+ Additionally, you can specify the parameter mappings
1466
+ to map event payload to Metaflow parameters for the flow.
1467
+ ```
1468
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1469
+ ```
1470
+ or
1471
+ ```
1472
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1473
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1474
+ ```
1475
+
1476
+ 'parameters' can also be a list of strings and tuples like so:
1477
+ ```
1478
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1479
+ ```
1480
+ This is equivalent to:
1481
+ ```
1482
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1483
+ ```
1484
+
1485
+
1486
+ Parameters
1487
+ ----------
1488
+ event : Union[str, Dict[str, Any]], optional, default None
1489
+ Event dependency for this flow.
1490
+ events : List[Union[str, Dict[str, Any]]], default []
1491
+ Events dependency for this flow.
1492
+ options : Dict[str, Any], default {}
1493
+ Backend-specific configuration for tuning eventing behavior.
1494
+ """
1495
+ ...
1496
+
1497
+ @typing.overload
1498
+ def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1499
+ ...
1500
+
1501
+ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1502
+ """
1503
+ Specifies the event(s) that this flow depends on.
1504
+
1505
+ ```
1506
+ @trigger(event='foo')
1507
+ ```
1508
+ or
1509
+ ```
1510
+ @trigger(events=['foo', 'bar'])
1511
+ ```
1512
+
1513
+ Additionally, you can specify the parameter mappings
1514
+ to map event payload to Metaflow parameters for the flow.
1515
+ ```
1516
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1517
+ ```
1518
+ or
1519
+ ```
1520
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1521
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1522
+ ```
1523
+
1524
+ 'parameters' can also be a list of strings and tuples like so:
1525
+ ```
1526
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1527
+ ```
1528
+ This is equivalent to:
1529
+ ```
1530
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1531
+ ```
1532
+
1533
+
1534
+ Parameters
1535
+ ----------
1536
+ event : Union[str, Dict[str, Any]], optional, default None
1537
+ Event dependency for this flow.
1538
+ events : List[Union[str, Dict[str, Any]]], default []
1539
+ Events dependency for this flow.
1540
+ options : Dict[str, Any], default {}
1541
+ Backend-specific configuration for tuning eventing behavior.
1542
+ """
1543
+ ...
1544
+
1545
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1546
+ """
1547
+ Allows setting external datastores to save data for the
1548
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1549
+
1550
+ This decorator is useful when users wish to save data to a different datastore
1551
+ than what is configured in Metaflow. This can be for variety of reasons:
1552
+
1553
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1554
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1555
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1556
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1557
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1558
+
1559
+ Usage:
1560
+ ----------
1561
+
1562
+ - Using a custom IAM role to access the datastore.
1563
+
1564
+ ```python
1565
+ @with_artifact_store(
1566
+ type="s3",
1567
+ config=lambda: {
1568
+ "root": "s3://my-bucket-foo/path/to/root",
1569
+ "role_arn": ROLE,
1570
+ },
1571
+ )
1572
+ class MyFlow(FlowSpec):
1573
+
1574
+ @checkpoint
1575
+ @step
1576
+ def start(self):
1577
+ with open("my_file.txt", "w") as f:
1578
+ f.write("Hello, World!")
1579
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1580
+ self.next(self.end)
1581
+
1582
+ ```
1583
+
1584
+ - Using credentials to access the s3-compatible datastore.
1585
+
1586
+ ```python
1587
+ @with_artifact_store(
1588
+ type="s3",
1589
+ config=lambda: {
1590
+ "root": "s3://my-bucket-foo/path/to/root",
1591
+ "client_params": {
1592
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1593
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1594
+ },
1595
+ },
1596
+ )
1597
+ class MyFlow(FlowSpec):
1598
+
1599
+ @checkpoint
1600
+ @step
1601
+ def start(self):
1602
+ with open("my_file.txt", "w") as f:
1603
+ f.write("Hello, World!")
1604
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1605
+ self.next(self.end)
1606
+
1607
+ ```
1572
1608
 
1609
+ - Accessing objects stored in external datastores after task execution.
1573
1610
 
1574
- Parameters
1611
+ ```python
1612
+ run = Run("CheckpointsTestsFlow/8992")
1613
+ with artifact_store_from(run=run, config={
1614
+ "client_params": {
1615
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1616
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1617
+ },
1618
+ }):
1619
+ with Checkpoint() as cp:
1620
+ latest = cp.list(
1621
+ task=run["start"].task
1622
+ )[0]
1623
+ print(latest)
1624
+ cp.load(
1625
+ latest,
1626
+ "test-checkpoints"
1627
+ )
1628
+
1629
+ task = Task("TorchTuneFlow/8484/train/53673")
1630
+ with artifact_store_from(run=run, config={
1631
+ "client_params": {
1632
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1633
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1634
+ },
1635
+ }):
1636
+ load_model(
1637
+ task.data.model_ref,
1638
+ "test-models"
1639
+ )
1640
+ ```
1641
+ Parameters:
1575
1642
  ----------
1576
- name : str
1577
- Project name. Make sure that the name is unique amongst all
1578
- projects that use the same production scheduler. The name may
1579
- contain only lowercase alphanumeric characters and underscores.
1580
1643
 
1581
- branch : Optional[str], default None
1582
- The branch to use. If not specified, the branch is set to
1583
- `user.<username>` unless `production` is set to `True`. This can
1584
- also be set on the command line using `--branch` as a top-level option.
1585
- It is an error to specify `branch` in the decorator and on the command line.
1644
+ type: str
1645
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1586
1646
 
1587
- production : bool, default False
1588
- Whether or not the branch is the production branch. This can also be set on the
1589
- command line using `--production` as a top-level option. It is an error to specify
1590
- `production` in the decorator and on the command line.
1591
- The project branch name will be:
1592
- - if `branch` is specified:
1593
- - if `production` is True: `prod.<branch>`
1594
- - if `production` is False: `test.<branch>`
1595
- - if `branch` is not specified:
1596
- - if `production` is True: `prod`
1597
- - if `production` is False: `user.<username>`
1647
+ config: dict or Callable
1648
+ Dictionary of configuration options for the datastore. The following keys are required:
1649
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1650
+ - example: 's3://bucket-name/path/to/root'
1651
+ - example: 'gs://bucket-name/path/to/root'
1652
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1653
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1654
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1655
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1598
1656
  """
1599
1657
  ...
1600
1658
 
@@ -1690,49 +1748,6 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
1690
1748
  """
1691
1749
  ...
1692
1750
 
1693
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1694
- """
1695
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1696
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1697
-
1698
-
1699
- Parameters
1700
- ----------
1701
- timeout : int
1702
- Time, in seconds before the task times out and fails. (Default: 3600)
1703
- poke_interval : int
1704
- Time in seconds that the job should wait in between each try. (Default: 60)
1705
- mode : str
1706
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1707
- exponential_backoff : bool
1708
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1709
- pool : str
1710
- the slot pool this task should run in,
1711
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1712
- soft_fail : bool
1713
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1714
- name : str
1715
- Name of the sensor on Airflow
1716
- description : str
1717
- Description of sensor in the Airflow UI
1718
- external_dag_id : str
1719
- The dag_id that contains the task you want to wait for.
1720
- external_task_ids : List[str]
1721
- The list of task_ids that you want to wait for.
1722
- If None (default value) the sensor waits for the DAG. (Default: None)
1723
- allowed_states : List[str]
1724
- Iterable of allowed states, (Default: ['success'])
1725
- failed_states : List[str]
1726
- Iterable of failed or dis-allowed states. (Default: None)
1727
- execution_delta : datetime.timedelta
1728
- time difference with the previous execution to look at,
1729
- the default is the same logical date as the current task or DAG. (Default: None)
1730
- check_existence: bool
1731
- Set to True to check if the external task exists or check if
1732
- the DAG to wait for exists. (Default: True)
1733
- """
1734
- ...
1735
-
1736
1751
  @typing.overload
1737
1752
  def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1738
1753
  """
@@ -1784,6 +1799,49 @@ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packa
1784
1799
  """
1785
1800
  ...
1786
1801
 
1802
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1803
+ """
1804
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1805
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1806
+
1807
+
1808
+ Parameters
1809
+ ----------
1810
+ timeout : int
1811
+ Time, in seconds before the task times out and fails. (Default: 3600)
1812
+ poke_interval : int
1813
+ Time in seconds that the job should wait in between each try. (Default: 60)
1814
+ mode : str
1815
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1816
+ exponential_backoff : bool
1817
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1818
+ pool : str
1819
+ the slot pool this task should run in,
1820
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1821
+ soft_fail : bool
1822
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1823
+ name : str
1824
+ Name of the sensor on Airflow
1825
+ description : str
1826
+ Description of sensor in the Airflow UI
1827
+ external_dag_id : str
1828
+ The dag_id that contains the task you want to wait for.
1829
+ external_task_ids : List[str]
1830
+ The list of task_ids that you want to wait for.
1831
+ If None (default value) the sensor waits for the DAG. (Default: None)
1832
+ allowed_states : List[str]
1833
+ Iterable of allowed states, (Default: ['success'])
1834
+ failed_states : List[str]
1835
+ Iterable of failed or dis-allowed states. (Default: None)
1836
+ execution_delta : datetime.timedelta
1837
+ time difference with the previous execution to look at,
1838
+ the default is the same logical date as the current task or DAG. (Default: None)
1839
+ check_existence: bool
1840
+ Set to True to check if the external task exists or check if
1841
+ the DAG to wait for exists. (Default: True)
1842
+ """
1843
+ ...
1844
+
1787
1845
  def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1788
1846
  """
1789
1847
  The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
@@ -1827,96 +1885,38 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
1827
1885
  """
1828
1886
  ...
1829
1887
 
1830
- @typing.overload
1831
- def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1888
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1832
1889
  """
1833
- Specifies the event(s) that this flow depends on.
1834
-
1835
- ```
1836
- @trigger(event='foo')
1837
- ```
1838
- or
1839
- ```
1840
- @trigger(events=['foo', 'bar'])
1841
- ```
1842
-
1843
- Additionally, you can specify the parameter mappings
1844
- to map event payload to Metaflow parameters for the flow.
1845
- ```
1846
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1847
- ```
1848
- or
1849
- ```
1850
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1851
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1852
- ```
1890
+ Specifies what flows belong to the same project.
1853
1891
 
1854
- 'parameters' can also be a list of strings and tuples like so:
1855
- ```
1856
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1857
- ```
1858
- This is equivalent to:
1859
- ```
1860
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1861
- ```
1892
+ A project-specific namespace is created for all flows that
1893
+ use the same `@project(name)`.
1862
1894
 
1863
1895
 
1864
1896
  Parameters
1865
1897
  ----------
1866
- event : Union[str, Dict[str, Any]], optional, default None
1867
- Event dependency for this flow.
1868
- events : List[Union[str, Dict[str, Any]]], default []
1869
- Events dependency for this flow.
1870
- options : Dict[str, Any], default {}
1871
- Backend-specific configuration for tuning eventing behavior.
1872
- """
1873
- ...
1874
-
1875
- @typing.overload
1876
- def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1877
- ...
1878
-
1879
- def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1880
- """
1881
- Specifies the event(s) that this flow depends on.
1882
-
1883
- ```
1884
- @trigger(event='foo')
1885
- ```
1886
- or
1887
- ```
1888
- @trigger(events=['foo', 'bar'])
1889
- ```
1890
-
1891
- Additionally, you can specify the parameter mappings
1892
- to map event payload to Metaflow parameters for the flow.
1893
- ```
1894
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1895
- ```
1896
- or
1897
- ```
1898
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1899
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1900
- ```
1901
-
1902
- 'parameters' can also be a list of strings and tuples like so:
1903
- ```
1904
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1905
- ```
1906
- This is equivalent to:
1907
- ```
1908
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1909
- ```
1898
+ name : str
1899
+ Project name. Make sure that the name is unique amongst all
1900
+ projects that use the same production scheduler. The name may
1901
+ contain only lowercase alphanumeric characters and underscores.
1910
1902
 
1903
+ branch : Optional[str], default None
1904
+ The branch to use. If not specified, the branch is set to
1905
+ `user.<username>` unless `production` is set to `True`. This can
1906
+ also be set on the command line using `--branch` as a top-level option.
1907
+ It is an error to specify `branch` in the decorator and on the command line.
1911
1908
 
1912
- Parameters
1913
- ----------
1914
- event : Union[str, Dict[str, Any]], optional, default None
1915
- Event dependency for this flow.
1916
- events : List[Union[str, Dict[str, Any]]], default []
1917
- Events dependency for this flow.
1918
- options : Dict[str, Any], default {}
1919
- Backend-specific configuration for tuning eventing behavior.
1909
+ production : bool, default False
1910
+ Whether or not the branch is the production branch. This can also be set on the
1911
+ command line using `--production` as a top-level option. It is an error to specify
1912
+ `production` in the decorator and on the command line.
1913
+ The project branch name will be:
1914
+ - if `branch` is specified:
1915
+ - if `production` is True: `prod.<branch>`
1916
+ - if `production` is False: `test.<branch>`
1917
+ - if `branch` is not specified:
1918
+ - if `production` is True: `prod`
1919
+ - if `production` is False: `user.<username>`
1920
1920
  """
1921
1921
  ...
1922
1922