ob-metaflow-stubs 6.0.4.1rc1__py2.py3-none-any.whl → 6.0.4.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. metaflow-stubs/__init__.pyi +982 -982
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +4 -4
  8. metaflow-stubs/client/filecache.pyi +1 -1
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +2 -2
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +2 -2
  14. metaflow-stubs/meta_files.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +1 -1
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +40 -40
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +2 -2
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +2 -2
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +2 -2
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +1 -1
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +1 -1
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +1 -1
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +1 -1
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +1 -1
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +2 -2
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +2 -2
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +2 -2
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +1 -1
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +1 -1
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +1 -1
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +1 -1
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +4 -1
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +1 -1
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +6 -3
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +1 -1
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +1 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +2 -2
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +2 -2
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +5 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +1 -1
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +63 -4
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +3 -3
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +1 -1
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +1 -1
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +1 -1
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +1 -1
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +1 -1
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +1 -1
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +1 -1
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +1 -1
  115. metaflow-stubs/multicore_utils.pyi +1 -1
  116. metaflow-stubs/ob_internal.pyi +1 -1
  117. metaflow-stubs/packaging_sys/__init__.pyi +4 -4
  118. metaflow-stubs/packaging_sys/backend.pyi +1 -1
  119. metaflow-stubs/packaging_sys/distribution_support.pyi +3 -3
  120. metaflow-stubs/packaging_sys/tar_backend.pyi +4 -4
  121. metaflow-stubs/packaging_sys/utils.pyi +1 -1
  122. metaflow-stubs/packaging_sys/v1.pyi +2 -2
  123. metaflow-stubs/parameters.pyi +2 -2
  124. metaflow-stubs/plugins/__init__.pyi +9 -9
  125. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  126. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  127. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  128. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  129. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  130. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  131. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  132. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  133. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  134. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  135. metaflow-stubs/plugins/argo/argo_workflows.pyi +1 -1
  136. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  137. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +1 -1
  139. metaflow-stubs/plugins/argo/exit_hooks.pyi +1 -1
  140. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  141. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  142. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  143. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  144. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  145. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  146. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  147. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  148. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +2 -2
  149. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  150. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  151. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  152. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  153. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +1 -1
  156. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  157. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  158. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  159. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +2 -2
  160. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  161. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  162. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  163. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  164. metaflow-stubs/plugins/cards/card_client.pyi +1 -1
  165. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  166. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  167. metaflow-stubs/plugins/cards/card_decorator.pyi +1 -1
  168. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  169. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  170. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  171. metaflow-stubs/plugins/cards/card_modules/components.pyi +2 -2
  172. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  173. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  174. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  175. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  176. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  177. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  178. metaflow-stubs/plugins/catch_decorator.pyi +1 -1
  179. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  180. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  181. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  182. metaflow-stubs/plugins/datatools/s3/s3.pyi +2 -2
  183. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  184. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  185. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  186. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  187. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  188. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  189. metaflow-stubs/plugins/exit_hook/__init__.pyi +1 -1
  190. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +1 -1
  191. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  192. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  193. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  194. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +2 -2
  195. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  196. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  197. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  198. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  199. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  200. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +1 -1
  201. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  202. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  203. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  204. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  205. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  206. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  207. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  208. metaflow-stubs/plugins/perimeters.pyi +1 -1
  209. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  210. metaflow-stubs/plugins/pypi/__init__.pyi +1 -1
  211. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  212. metaflow-stubs/plugins/pypi/conda_environment.pyi +3 -3
  213. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  214. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  215. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  216. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  217. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  218. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  219. metaflow-stubs/plugins/secrets/__init__.pyi +1 -1
  220. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  221. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  222. metaflow-stubs/plugins/secrets/secrets_func.pyi +1 -1
  223. metaflow-stubs/plugins/secrets/secrets_spec.pyi +1 -1
  224. metaflow-stubs/plugins/secrets/utils.pyi +1 -1
  225. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  226. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  227. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +1 -1
  228. metaflow-stubs/plugins/timeout_decorator.pyi +1 -1
  229. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  230. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  231. metaflow-stubs/plugins/uv/uv_environment.pyi +1 -1
  232. metaflow-stubs/profilers/__init__.pyi +1 -1
  233. metaflow-stubs/pylint_wrapper.pyi +1 -1
  234. metaflow-stubs/runner/__init__.pyi +1 -1
  235. metaflow-stubs/runner/deployer.pyi +5 -5
  236. metaflow-stubs/runner/deployer_impl.pyi +1 -1
  237. metaflow-stubs/runner/metaflow_runner.pyi +1 -1
  238. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  239. metaflow-stubs/runner/nbrun.pyi +1 -1
  240. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  241. metaflow-stubs/runner/utils.pyi +2 -2
  242. metaflow-stubs/system/__init__.pyi +1 -1
  243. metaflow-stubs/system/system_logger.pyi +2 -2
  244. metaflow-stubs/system/system_monitor.pyi +1 -1
  245. metaflow-stubs/tagging_util.pyi +1 -1
  246. metaflow-stubs/tuple_util.pyi +1 -1
  247. metaflow-stubs/user_configs/__init__.pyi +1 -1
  248. metaflow-stubs/user_configs/config_options.pyi +1 -1
  249. metaflow-stubs/user_configs/config_parameters.pyi +4 -4
  250. metaflow-stubs/user_decorators/__init__.pyi +1 -1
  251. metaflow-stubs/user_decorators/common.pyi +1 -1
  252. metaflow-stubs/user_decorators/mutable_flow.pyi +4 -4
  253. metaflow-stubs/user_decorators/mutable_step.pyi +4 -4
  254. metaflow-stubs/user_decorators/user_flow_decorator.pyi +3 -3
  255. metaflow-stubs/user_decorators/user_step_decorator.pyi +4 -4
  256. {ob_metaflow_stubs-6.0.4.1rc1.dist-info → ob_metaflow_stubs-6.0.4.2.dist-info}/METADATA +1 -1
  257. ob_metaflow_stubs-6.0.4.2.dist-info/RECORD +260 -0
  258. ob_metaflow_stubs-6.0.4.1rc1.dist-info/RECORD +0 -260
  259. {ob_metaflow_stubs-6.0.4.1rc1.dist-info → ob_metaflow_stubs-6.0.4.2.dist-info}/WHEEL +0 -0
  260. {ob_metaflow_stubs-6.0.4.1rc1.dist-info → ob_metaflow_stubs-6.0.4.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.16.0.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-14T20:15:55.146353 #
4
+ # Generated on 2025-07-15T03:12:46.861592 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -162,436 +162,380 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
162
162
  """
163
163
  ...
164
164
 
165
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
165
+ @typing.overload
166
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
166
167
  """
167
- Decorator that helps cache, version and store models/datasets from huggingface hub.
168
-
169
- > Examples
170
-
171
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
172
- ```python
173
- @huggingface_hub
174
- @step
175
- def pull_model_from_huggingface(self):
176
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
177
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
178
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
179
- # value of the function is a reference to the model in the backend storage.
180
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
181
-
182
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
183
- self.llama_model = current.huggingface_hub.snapshot_download(
184
- repo_id=self.model_id,
185
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
186
- )
187
- self.next(self.train)
188
- ```
189
-
190
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
191
- ```python
192
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
193
- @step
194
- def pull_model_from_huggingface(self):
195
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
196
- ```
197
-
198
- ```python
199
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
200
- @step
201
- def finetune_model(self):
202
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
203
- # path_to_model will be /my-directory
204
- ```
205
-
206
- ```python
207
- # Takes all the arguments passed to `snapshot_download`
208
- # except for `local_dir`
209
- @huggingface_hub(load=[
210
- {
211
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
212
- },
213
- {
214
- "repo_id": "myorg/mistral-lora",
215
- "repo_type": "model",
216
- },
217
- ])
218
- @step
219
- def finetune_model(self):
220
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
221
- # path_to_model will be /my-directory
222
- ```
223
-
224
-
225
- Parameters
226
- ----------
227
- temp_dir_root : str, optional
228
- The root directory that will hold the temporary directory where objects will be downloaded.
229
-
230
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
231
- The list of repos (models/datasets) to load.
168
+ Specifies a timeout for your step.
232
169
 
233
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
170
+ This decorator is useful if this step may hang indefinitely.
234
171
 
235
- - If repo (model/dataset) is not found in the datastore:
236
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
237
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
238
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
172
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
173
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
174
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
239
175
 
240
- - If repo is found in the datastore:
241
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
242
- """
243
- ...
244
-
245
- @typing.overload
246
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
247
- """
248
- Specifies environment variables to be set prior to the execution of a step.
176
+ Note that all the values specified in parameters are added together so if you specify
177
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
249
178
 
250
179
 
251
180
  Parameters
252
181
  ----------
253
- vars : Dict[str, str], default {}
254
- Dictionary of environment variables to set.
182
+ seconds : int, default 0
183
+ Number of seconds to wait prior to timing out.
184
+ minutes : int, default 0
185
+ Number of minutes to wait prior to timing out.
186
+ hours : int, default 0
187
+ Number of hours to wait prior to timing out.
255
188
  """
256
189
  ...
257
190
 
258
191
  @typing.overload
259
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
192
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
260
193
  ...
261
194
 
262
195
  @typing.overload
263
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
196
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
264
197
  ...
265
198
 
266
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
199
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
267
200
  """
268
- Specifies environment variables to be set prior to the execution of a step.
201
+ Specifies a timeout for your step.
202
+
203
+ This decorator is useful if this step may hang indefinitely.
204
+
205
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
206
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
207
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
208
+
209
+ Note that all the values specified in parameters are added together so if you specify
210
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
269
211
 
270
212
 
271
213
  Parameters
272
214
  ----------
273
- vars : Dict[str, str], default {}
274
- Dictionary of environment variables to set.
215
+ seconds : int, default 0
216
+ Number of seconds to wait prior to timing out.
217
+ minutes : int, default 0
218
+ Number of minutes to wait prior to timing out.
219
+ hours : int, default 0
220
+ Number of hours to wait prior to timing out.
275
221
  """
276
222
  ...
277
223
 
278
224
  @typing.overload
279
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
225
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
280
226
  """
281
- Specifies the Conda environment for the step.
227
+ Specifies the resources needed when executing this step.
282
228
 
283
- Information in this decorator will augment any
284
- attributes set in the `@conda_base` flow-level decorator. Hence,
285
- you can use `@conda_base` to set packages required by all
286
- steps and use `@conda` to specify step-specific overrides.
229
+ Use `@resources` to specify the resource requirements
230
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
231
+
232
+ You can choose the compute layer on the command line by executing e.g.
233
+ ```
234
+ python myflow.py run --with batch
235
+ ```
236
+ or
237
+ ```
238
+ python myflow.py run --with kubernetes
239
+ ```
240
+ which executes the flow on the desired system using the
241
+ requirements specified in `@resources`.
287
242
 
288
243
 
289
244
  Parameters
290
245
  ----------
291
- packages : Dict[str, str], default {}
292
- Packages to use for this step. The key is the name of the package
293
- and the value is the version to use.
294
- libraries : Dict[str, str], default {}
295
- Supported for backward compatibility. When used with packages, packages will take precedence.
296
- python : str, optional, default None
297
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
298
- that the version used will correspond to the version of the Python interpreter used to start the run.
299
- disabled : bool, default False
300
- If set to True, disables @conda.
246
+ cpu : int, default 1
247
+ Number of CPUs required for this step.
248
+ gpu : int, optional, default None
249
+ Number of GPUs required for this step.
250
+ disk : int, optional, default None
251
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
252
+ memory : int, default 4096
253
+ Memory size (in MB) required for this step.
254
+ shared_memory : int, optional, default None
255
+ The value for the size (in MiB) of the /dev/shm volume for this step.
256
+ This parameter maps to the `--shm-size` option in Docker.
301
257
  """
302
258
  ...
303
259
 
304
260
  @typing.overload
305
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
261
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
306
262
  ...
307
263
 
308
264
  @typing.overload
309
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
265
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
310
266
  ...
311
267
 
312
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
268
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
313
269
  """
314
- Specifies the Conda environment for the step.
270
+ Specifies the resources needed when executing this step.
315
271
 
316
- Information in this decorator will augment any
317
- attributes set in the `@conda_base` flow-level decorator. Hence,
318
- you can use `@conda_base` to set packages required by all
319
- steps and use `@conda` to specify step-specific overrides.
272
+ Use `@resources` to specify the resource requirements
273
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
274
+
275
+ You can choose the compute layer on the command line by executing e.g.
276
+ ```
277
+ python myflow.py run --with batch
278
+ ```
279
+ or
280
+ ```
281
+ python myflow.py run --with kubernetes
282
+ ```
283
+ which executes the flow on the desired system using the
284
+ requirements specified in `@resources`.
320
285
 
321
286
 
322
287
  Parameters
323
288
  ----------
324
- packages : Dict[str, str], default {}
325
- Packages to use for this step. The key is the name of the package
326
- and the value is the version to use.
327
- libraries : Dict[str, str], default {}
328
- Supported for backward compatibility. When used with packages, packages will take precedence.
329
- python : str, optional, default None
330
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
331
- that the version used will correspond to the version of the Python interpreter used to start the run.
332
- disabled : bool, default False
333
- If set to True, disables @conda.
289
+ cpu : int, default 1
290
+ Number of CPUs required for this step.
291
+ gpu : int, optional, default None
292
+ Number of GPUs required for this step.
293
+ disk : int, optional, default None
294
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
295
+ memory : int, default 4096
296
+ Memory size (in MB) required for this step.
297
+ shared_memory : int, optional, default None
298
+ The value for the size (in MiB) of the /dev/shm volume for this step.
299
+ This parameter maps to the `--shm-size` option in Docker.
334
300
  """
335
301
  ...
336
302
 
337
303
  @typing.overload
338
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
304
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
339
305
  """
340
- Enables loading / saving of models within a step.
306
+ Enables checkpointing for a step.
341
307
 
342
308
  > Examples
343
- - Saving Models
344
- ```python
345
- @model
346
- @step
347
- def train(self):
348
- # current.model.save returns a dictionary reference to the model saved
349
- self.my_model = current.model.save(
350
- path_to_my_model,
351
- label="my_model",
352
- metadata={
353
- "epochs": 10,
354
- "batch-size": 32,
355
- "learning-rate": 0.001,
356
- }
357
- )
358
- self.next(self.test)
359
309
 
360
- @model(load="my_model")
361
- @step
362
- def test(self):
363
- # `current.model.loaded` returns a dictionary of the loaded models
364
- # where the key is the name of the artifact and the value is the path to the model
365
- print(os.listdir(current.model.loaded["my_model"]))
366
- self.next(self.end)
367
- ```
310
+ - Saving Checkpoints
368
311
 
369
- - Loading models
370
312
  ```python
313
+ @checkpoint
371
314
  @step
372
315
  def train(self):
373
- # current.model.load returns the path to the model loaded
374
- checkpoint_path = current.model.load(
375
- self.checkpoint_key,
376
- )
377
- model_path = current.model.load(
378
- self.model,
379
- )
380
- self.next(self.test)
381
- ```
382
-
383
-
384
- Parameters
385
- ----------
386
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
387
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
388
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
389
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
390
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
391
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
316
+ model = create_model(self.parameters, checkpoint_path = None)
317
+ for i in range(self.epochs):
318
+ # some training logic
319
+ loss = model.train(self.dataset)
320
+ if i % 10 == 0:
321
+ model.save(
322
+ current.checkpoint.directory,
323
+ )
324
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
325
+ # and returns a reference dictionary to the checkpoint saved in the datastore
326
+ self.latest_checkpoint = current.checkpoint.save(
327
+ name="epoch_checkpoint",
328
+ metadata={
329
+ "epoch": i,
330
+ "loss": loss,
331
+ }
332
+ )
333
+ ```
334
+
335
+ - Using Loaded Checkpoints
336
+
337
+ ```python
338
+ @retry(times=3)
339
+ @checkpoint
340
+ @step
341
+ def train(self):
342
+ # Assume that the task has restarted and the previous attempt of the task
343
+ # saved a checkpoint
344
+ checkpoint_path = None
345
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
346
+ print("Loaded checkpoint from the previous attempt")
347
+ checkpoint_path = current.checkpoint.directory
348
+
349
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
350
+ for i in range(self.epochs):
351
+ ...
352
+ ```
353
+
354
+
355
+ Parameters
356
+ ----------
357
+ load_policy : str, default: "fresh"
358
+ The policy for loading the checkpoint. The following policies are supported:
359
+ - "eager": Loads the the latest available checkpoint within the namespace.
360
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
361
+ will be loaded at the start of the task.
362
+ - "none": Do not load any checkpoint
363
+ - "fresh": Loads the lastest checkpoint created within the running Task.
364
+ This mode helps loading checkpoints across various retry attempts of the same task.
365
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
366
+ created within the task will be loaded when the task is retries execution on failure.
392
367
 
393
368
  temp_dir_root : str, default: None
394
- The root directory under which `current.model.loaded` will store loaded models
369
+ The root directory under which `current.checkpoint.directory` will be created.
395
370
  """
396
371
  ...
397
372
 
398
373
  @typing.overload
399
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
374
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
400
375
  ...
401
376
 
402
377
  @typing.overload
403
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
378
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
404
379
  ...
405
380
 
406
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
381
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
407
382
  """
408
- Enables loading / saving of models within a step.
383
+ Enables checkpointing for a step.
409
384
 
410
385
  > Examples
411
- - Saving Models
386
+
387
+ - Saving Checkpoints
388
+
412
389
  ```python
413
- @model
390
+ @checkpoint
414
391
  @step
415
392
  def train(self):
416
- # current.model.save returns a dictionary reference to the model saved
417
- self.my_model = current.model.save(
418
- path_to_my_model,
419
- label="my_model",
420
- metadata={
421
- "epochs": 10,
422
- "batch-size": 32,
423
- "learning-rate": 0.001,
424
- }
425
- )
426
- self.next(self.test)
427
-
428
- @model(load="my_model")
429
- @step
430
- def test(self):
431
- # `current.model.loaded` returns a dictionary of the loaded models
432
- # where the key is the name of the artifact and the value is the path to the model
433
- print(os.listdir(current.model.loaded["my_model"]))
434
- self.next(self.end)
393
+ model = create_model(self.parameters, checkpoint_path = None)
394
+ for i in range(self.epochs):
395
+ # some training logic
396
+ loss = model.train(self.dataset)
397
+ if i % 10 == 0:
398
+ model.save(
399
+ current.checkpoint.directory,
400
+ )
401
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
402
+ # and returns a reference dictionary to the checkpoint saved in the datastore
403
+ self.latest_checkpoint = current.checkpoint.save(
404
+ name="epoch_checkpoint",
405
+ metadata={
406
+ "epoch": i,
407
+ "loss": loss,
408
+ }
409
+ )
435
410
  ```
436
411
 
437
- - Loading models
412
+ - Using Loaded Checkpoints
413
+
438
414
  ```python
415
+ @retry(times=3)
416
+ @checkpoint
439
417
  @step
440
418
  def train(self):
441
- # current.model.load returns the path to the model loaded
442
- checkpoint_path = current.model.load(
443
- self.checkpoint_key,
444
- )
445
- model_path = current.model.load(
446
- self.model,
447
- )
448
- self.next(self.test)
419
+ # Assume that the task has restarted and the previous attempt of the task
420
+ # saved a checkpoint
421
+ checkpoint_path = None
422
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
423
+ print("Loaded checkpoint from the previous attempt")
424
+ checkpoint_path = current.checkpoint.directory
425
+
426
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
427
+ for i in range(self.epochs):
428
+ ...
449
429
  ```
450
430
 
451
431
 
452
432
  Parameters
453
433
  ----------
454
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
455
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
456
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
457
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
458
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
459
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
434
+ load_policy : str, default: "fresh"
435
+ The policy for loading the checkpoint. The following policies are supported:
436
+ - "eager": Loads the the latest available checkpoint within the namespace.
437
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
438
+ will be loaded at the start of the task.
439
+ - "none": Do not load any checkpoint
440
+ - "fresh": Loads the lastest checkpoint created within the running Task.
441
+ This mode helps loading checkpoints across various retry attempts of the same task.
442
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
443
+ created within the task will be loaded when the task is retries execution on failure.
460
444
 
461
445
  temp_dir_root : str, default: None
462
- The root directory under which `current.model.loaded` will store loaded models
446
+ The root directory under which `current.checkpoint.directory` will be created.
463
447
  """
464
448
  ...
465
449
 
466
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
450
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
467
451
  """
468
- Specifies that this step should execute on Kubernetes.
452
+ Specifies that this step should execute on DGX cloud.
469
453
 
470
454
 
471
455
  Parameters
472
456
  ----------
473
- cpu : int, default 1
474
- Number of CPUs required for this step. If `@resources` is
475
- also present, the maximum value from all decorators is used.
476
- memory : int, default 4096
477
- Memory size (in MB) required for this step. If
478
- `@resources` is also present, the maximum value from all decorators is
479
- used.
480
- disk : int, default 10240
481
- Disk size (in MB) required for this step. If
482
- `@resources` is also present, the maximum value from all decorators is
483
- used.
484
- image : str, optional, default None
485
- Docker image to use when launching on Kubernetes. If not specified, and
486
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
487
- not, a default Docker image mapping to the current version of Python is used.
488
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
489
- If given, the imagePullPolicy to be applied to the Docker image of the step.
490
- image_pull_secrets: List[str], default []
491
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
492
- Kubernetes image pull secrets to use when pulling container images
493
- in Kubernetes.
494
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
495
- Kubernetes service account to use when launching pod in Kubernetes.
496
- secrets : List[str], optional, default None
497
- Kubernetes secrets to use when launching pod in Kubernetes. These
498
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
499
- in Metaflow configuration.
500
- node_selector: Union[Dict[str,str], str], optional, default None
501
- Kubernetes node selector(s) to apply to the pod running the task.
502
- Can be passed in as a comma separated string of values e.g.
503
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
504
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
505
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
506
- Kubernetes namespace to use when launching pod in Kubernetes.
507
- gpu : int, optional, default None
508
- Number of GPUs required for this step. A value of zero implies that
509
- the scheduled node should not have GPUs.
510
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
511
- The vendor of the GPUs to be used for this step.
512
- tolerations : List[str], default []
513
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
514
- Kubernetes tolerations to use when launching pod in Kubernetes.
515
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
516
- Kubernetes labels to use when launching pod in Kubernetes.
517
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
518
- Kubernetes annotations to use when launching pod in Kubernetes.
519
- use_tmpfs : bool, default False
520
- This enables an explicit tmpfs mount for this step.
521
- tmpfs_tempdir : bool, default True
522
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
523
- tmpfs_size : int, optional, default: None
524
- The value for the size (in MiB) of the tmpfs mount for this step.
525
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
526
- memory allocated for this step.
527
- tmpfs_path : str, optional, default /metaflow_temp
528
- Path to tmpfs mount for this step.
529
- persistent_volume_claims : Dict[str, str], optional, default None
530
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
531
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
532
- shared_memory: int, optional
533
- Shared memory size (in MiB) required for this step
534
- port: int, optional
535
- Port number to specify in the Kubernetes job object
536
- compute_pool : str, optional, default None
537
- Compute pool to be used for for this step.
538
- If not specified, any accessible compute pool within the perimeter is used.
539
- hostname_resolution_timeout: int, default 10 * 60
540
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
541
- Only applicable when @parallel is used.
542
- qos: str, default: Burstable
543
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
544
-
545
- security_context: Dict[str, Any], optional, default None
546
- Container security context. Applies to the task container. Allows the following keys:
547
- - privileged: bool, optional, default None
548
- - allow_privilege_escalation: bool, optional, default None
549
- - run_as_user: int, optional, default None
550
- - run_as_group: int, optional, default None
551
- - run_as_non_root: bool, optional, default None
457
+ gpu : int
458
+ Number of GPUs to use.
459
+ gpu_type : str
460
+ Type of Nvidia GPU to use.
461
+ queue_timeout : int
462
+ Time to keep the job in NVCF's queue.
552
463
  """
553
464
  ...
554
465
 
555
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
466
+ @typing.overload
467
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
556
468
  """
557
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
558
-
559
- User code call
560
- --------------
561
- @ollama(
562
- models=[...],
563
- ...
564
- )
565
-
566
- Valid backend options
567
- ---------------------
568
- - 'local': Run as a separate process on the local task machine.
569
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
570
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
571
-
572
- Valid model options
573
- -------------------
574
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
469
+ Internal decorator to support Fast bakery
470
+ """
471
+ ...
472
+
473
+ @typing.overload
474
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
475
+ ...
476
+
477
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
478
+ """
479
+ Internal decorator to support Fast bakery
480
+ """
481
+ ...
482
+
483
+ @typing.overload
484
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
485
+ """
486
+ Specifies the Conda environment for the step.
487
+
488
+ Information in this decorator will augment any
489
+ attributes set in the `@conda_base` flow-level decorator. Hence,
490
+ you can use `@conda_base` to set packages required by all
491
+ steps and use `@conda` to specify step-specific overrides.
575
492
 
576
493
 
577
494
  Parameters
578
495
  ----------
579
- models: list[str]
580
- List of Ollama containers running models in sidecars.
581
- backend: str
582
- Determines where and how to run the Ollama process.
583
- force_pull: bool
584
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
585
- cache_update_policy: str
586
- Cache update policy: "auto", "force", or "never".
587
- force_cache_update: bool
588
- Simple override for "force" cache update policy.
589
- debug: bool
590
- Whether to turn on verbose debugging logs.
591
- circuit_breaker_config: dict
592
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
593
- timeout_config: dict
594
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
496
+ packages : Dict[str, str], default {}
497
+ Packages to use for this step. The key is the name of the package
498
+ and the value is the version to use.
499
+ libraries : Dict[str, str], default {}
500
+ Supported for backward compatibility. When used with packages, packages will take precedence.
501
+ python : str, optional, default None
502
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
503
+ that the version used will correspond to the version of the Python interpreter used to start the run.
504
+ disabled : bool, default False
505
+ If set to True, disables @conda.
506
+ """
507
+ ...
508
+
509
+ @typing.overload
510
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
511
+ ...
512
+
513
+ @typing.overload
514
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
515
+ ...
516
+
517
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
518
+ """
519
+ Specifies the Conda environment for the step.
520
+
521
+ Information in this decorator will augment any
522
+ attributes set in the `@conda_base` flow-level decorator. Hence,
523
+ you can use `@conda_base` to set packages required by all
524
+ steps and use `@conda` to specify step-specific overrides.
525
+
526
+
527
+ Parameters
528
+ ----------
529
+ packages : Dict[str, str], default {}
530
+ Packages to use for this step. The key is the name of the package
531
+ and the value is the version to use.
532
+ libraries : Dict[str, str], default {}
533
+ Supported for backward compatibility. When used with packages, packages will take precedence.
534
+ python : str, optional, default None
535
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
536
+ that the version used will correspond to the version of the Python interpreter used to start the run.
537
+ disabled : bool, default False
538
+ If set to True, disables @conda.
595
539
  """
596
540
  ...
597
541
 
@@ -635,61 +579,124 @@ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None],
635
579
  ...
636
580
 
637
581
  @typing.overload
638
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
582
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
639
583
  """
640
- Specifies a timeout for your step.
641
-
642
- This decorator is useful if this step may hang indefinitely.
643
-
644
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
645
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
646
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
647
-
648
- Note that all the values specified in parameters are added together so if you specify
649
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
584
+ Specifies environment variables to be set prior to the execution of a step.
650
585
 
651
586
 
652
587
  Parameters
653
588
  ----------
654
- seconds : int, default 0
655
- Number of seconds to wait prior to timing out.
656
- minutes : int, default 0
657
- Number of minutes to wait prior to timing out.
658
- hours : int, default 0
659
- Number of hours to wait prior to timing out.
589
+ vars : Dict[str, str], default {}
590
+ Dictionary of environment variables to set.
660
591
  """
661
592
  ...
662
593
 
663
594
  @typing.overload
664
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
595
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
665
596
  ...
666
597
 
667
598
  @typing.overload
668
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
599
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
669
600
  ...
670
601
 
671
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
602
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
672
603
  """
673
- Specifies a timeout for your step.
674
-
675
- This decorator is useful if this step may hang indefinitely.
604
+ Specifies environment variables to be set prior to the execution of a step.
676
605
 
677
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
678
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
679
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
680
606
 
681
- Note that all the values specified in parameters are added together so if you specify
682
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
607
+ Parameters
608
+ ----------
609
+ vars : Dict[str, str], default {}
610
+ Dictionary of environment variables to set.
611
+ """
612
+ ...
613
+
614
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
615
+ """
616
+ Specifies that this step should execute on Kubernetes.
683
617
 
684
618
 
685
619
  Parameters
686
620
  ----------
687
- seconds : int, default 0
688
- Number of seconds to wait prior to timing out.
689
- minutes : int, default 0
690
- Number of minutes to wait prior to timing out.
691
- hours : int, default 0
692
- Number of hours to wait prior to timing out.
621
+ cpu : int, default 1
622
+ Number of CPUs required for this step. If `@resources` is
623
+ also present, the maximum value from all decorators is used.
624
+ memory : int, default 4096
625
+ Memory size (in MB) required for this step. If
626
+ `@resources` is also present, the maximum value from all decorators is
627
+ used.
628
+ disk : int, default 10240
629
+ Disk size (in MB) required for this step. If
630
+ `@resources` is also present, the maximum value from all decorators is
631
+ used.
632
+ image : str, optional, default None
633
+ Docker image to use when launching on Kubernetes. If not specified, and
634
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
635
+ not, a default Docker image mapping to the current version of Python is used.
636
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
637
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
638
+ image_pull_secrets: List[str], default []
639
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
640
+ Kubernetes image pull secrets to use when pulling container images
641
+ in Kubernetes.
642
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
643
+ Kubernetes service account to use when launching pod in Kubernetes.
644
+ secrets : List[str], optional, default None
645
+ Kubernetes secrets to use when launching pod in Kubernetes. These
646
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
647
+ in Metaflow configuration.
648
+ node_selector: Union[Dict[str,str], str], optional, default None
649
+ Kubernetes node selector(s) to apply to the pod running the task.
650
+ Can be passed in as a comma separated string of values e.g.
651
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
652
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
653
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
654
+ Kubernetes namespace to use when launching pod in Kubernetes.
655
+ gpu : int, optional, default None
656
+ Number of GPUs required for this step. A value of zero implies that
657
+ the scheduled node should not have GPUs.
658
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
659
+ The vendor of the GPUs to be used for this step.
660
+ tolerations : List[str], default []
661
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
662
+ Kubernetes tolerations to use when launching pod in Kubernetes.
663
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
664
+ Kubernetes labels to use when launching pod in Kubernetes.
665
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
666
+ Kubernetes annotations to use when launching pod in Kubernetes.
667
+ use_tmpfs : bool, default False
668
+ This enables an explicit tmpfs mount for this step.
669
+ tmpfs_tempdir : bool, default True
670
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
671
+ tmpfs_size : int, optional, default: None
672
+ The value for the size (in MiB) of the tmpfs mount for this step.
673
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
674
+ memory allocated for this step.
675
+ tmpfs_path : str, optional, default /metaflow_temp
676
+ Path to tmpfs mount for this step.
677
+ persistent_volume_claims : Dict[str, str], optional, default None
678
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
679
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
680
+ shared_memory: int, optional
681
+ Shared memory size (in MiB) required for this step
682
+ port: int, optional
683
+ Port number to specify in the Kubernetes job object
684
+ compute_pool : str, optional, default None
685
+ Compute pool to be used for for this step.
686
+ If not specified, any accessible compute pool within the perimeter is used.
687
+ hostname_resolution_timeout: int, default 10 * 60
688
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
689
+ Only applicable when @parallel is used.
690
+ qos: str, default: Burstable
691
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
692
+
693
+ security_context: Dict[str, Any], optional, default None
694
+ Container security context. Applies to the task container. Allows the following keys:
695
+ - privileged: bool, optional, default None
696
+ - allow_privilege_escalation: bool, optional, default None
697
+ - run_as_user: int, optional, default None
698
+ - run_as_group: int, optional, default None
699
+ - run_as_non_root: bool, optional, default None
693
700
  """
694
701
  ...
695
702
 
@@ -712,106 +719,267 @@ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None
712
719
  """
713
720
  ...
714
721
 
715
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
722
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
716
723
  """
717
- This decorator is used to run vllm APIs as Metaflow task sidecars.
724
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
718
725
 
719
- User code call
720
- --------------
721
- @vllm(
722
- model="...",
723
- ...
724
- )
726
+ > Examples
725
727
 
726
- Valid backend options
727
- ---------------------
728
- - 'local': Run as a separate process on the local task machine.
728
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
729
+ ```python
730
+ @huggingface_hub
731
+ @step
732
+ def pull_model_from_huggingface(self):
733
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
734
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
735
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
736
+ # value of the function is a reference to the model in the backend storage.
737
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
729
738
 
730
- Valid model options
731
- -------------------
732
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
739
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
740
+ self.llama_model = current.huggingface_hub.snapshot_download(
741
+ repo_id=self.model_id,
742
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
743
+ )
744
+ self.next(self.train)
745
+ ```
733
746
 
734
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
735
- If you need multiple models, you must create multiple @vllm decorators.
747
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
748
+ ```python
749
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
750
+ @step
751
+ def pull_model_from_huggingface(self):
752
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
753
+ ```
754
+
755
+ ```python
756
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
757
+ @step
758
+ def finetune_model(self):
759
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
760
+ # path_to_model will be /my-directory
761
+ ```
762
+
763
+ ```python
764
+ # Takes all the arguments passed to `snapshot_download`
765
+ # except for `local_dir`
766
+ @huggingface_hub(load=[
767
+ {
768
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
769
+ },
770
+ {
771
+ "repo_id": "myorg/mistral-lora",
772
+ "repo_type": "model",
773
+ },
774
+ ])
775
+ @step
776
+ def finetune_model(self):
777
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
778
+ # path_to_model will be /my-directory
779
+ ```
736
780
 
737
781
 
738
782
  Parameters
739
783
  ----------
740
- model: str
741
- HuggingFace model identifier to be served by vLLM.
742
- backend: str
743
- Determines where and how to run the vLLM process.
744
- openai_api_server: bool
745
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
746
- Default is False (uses native engine).
747
- Set to True for backward compatibility with existing code.
748
- debug: bool
749
- Whether to turn on verbose debugging logs.
750
- card_refresh_interval: int
751
- Interval in seconds for refreshing the vLLM status card.
752
- Only used when openai_api_server=True.
753
- max_retries: int
754
- Maximum number of retries checking for vLLM server startup.
755
- Only used when openai_api_server=True.
756
- retry_alert_frequency: int
757
- Frequency of alert logs for vLLM server startup retries.
758
- Only used when openai_api_server=True.
759
- engine_args : dict
760
- Additional keyword arguments to pass to the vLLM engine.
761
- For example, `tensor_parallel_size=2`.
784
+ temp_dir_root : str, optional
785
+ The root directory that will hold the temporary directory where objects will be downloaded.
786
+
787
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
788
+ The list of repos (models/datasets) to load.
789
+
790
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
791
+
792
+ - If repo (model/dataset) is not found in the datastore:
793
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
794
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
795
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
796
+
797
+ - If repo is found in the datastore:
798
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
799
+ """
800
+ ...
801
+
802
+ @typing.overload
803
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
804
+ """
805
+ Enables loading / saving of models within a step.
806
+
807
+ > Examples
808
+ - Saving Models
809
+ ```python
810
+ @model
811
+ @step
812
+ def train(self):
813
+ # current.model.save returns a dictionary reference to the model saved
814
+ self.my_model = current.model.save(
815
+ path_to_my_model,
816
+ label="my_model",
817
+ metadata={
818
+ "epochs": 10,
819
+ "batch-size": 32,
820
+ "learning-rate": 0.001,
821
+ }
822
+ )
823
+ self.next(self.test)
824
+
825
+ @model(load="my_model")
826
+ @step
827
+ def test(self):
828
+ # `current.model.loaded` returns a dictionary of the loaded models
829
+ # where the key is the name of the artifact and the value is the path to the model
830
+ print(os.listdir(current.model.loaded["my_model"]))
831
+ self.next(self.end)
832
+ ```
833
+
834
+ - Loading models
835
+ ```python
836
+ @step
837
+ def train(self):
838
+ # current.model.load returns the path to the model loaded
839
+ checkpoint_path = current.model.load(
840
+ self.checkpoint_key,
841
+ )
842
+ model_path = current.model.load(
843
+ self.model,
844
+ )
845
+ self.next(self.test)
846
+ ```
847
+
848
+
849
+ Parameters
850
+ ----------
851
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
852
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
853
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
854
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
855
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
856
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
857
+
858
+ temp_dir_root : str, default: None
859
+ The root directory under which `current.model.loaded` will store loaded models
860
+ """
861
+ ...
862
+
863
+ @typing.overload
864
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
865
+ ...
866
+
867
+ @typing.overload
868
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
869
+ ...
870
+
871
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
872
+ """
873
+ Enables loading / saving of models within a step.
874
+
875
+ > Examples
876
+ - Saving Models
877
+ ```python
878
+ @model
879
+ @step
880
+ def train(self):
881
+ # current.model.save returns a dictionary reference to the model saved
882
+ self.my_model = current.model.save(
883
+ path_to_my_model,
884
+ label="my_model",
885
+ metadata={
886
+ "epochs": 10,
887
+ "batch-size": 32,
888
+ "learning-rate": 0.001,
889
+ }
890
+ )
891
+ self.next(self.test)
892
+
893
+ @model(load="my_model")
894
+ @step
895
+ def test(self):
896
+ # `current.model.loaded` returns a dictionary of the loaded models
897
+ # where the key is the name of the artifact and the value is the path to the model
898
+ print(os.listdir(current.model.loaded["my_model"]))
899
+ self.next(self.end)
900
+ ```
901
+
902
+ - Loading models
903
+ ```python
904
+ @step
905
+ def train(self):
906
+ # current.model.load returns the path to the model loaded
907
+ checkpoint_path = current.model.load(
908
+ self.checkpoint_key,
909
+ )
910
+ model_path = current.model.load(
911
+ self.model,
912
+ )
913
+ self.next(self.test)
914
+ ```
915
+
916
+
917
+ Parameters
918
+ ----------
919
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
920
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
921
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
922
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
923
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
924
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
925
+
926
+ temp_dir_root : str, default: None
927
+ The root directory under which `current.model.loaded` will store loaded models
762
928
  """
763
929
  ...
764
930
 
765
931
  @typing.overload
766
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
932
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
767
933
  """
768
- Creates a human-readable report, a Metaflow Card, after this step completes.
934
+ Specifies that the step will success under all circumstances.
769
935
 
770
- Note that you may add multiple `@card` decorators in a step with different parameters.
936
+ The decorator will create an optional artifact, specified by `var`, which
937
+ contains the exception raised. You can use it to detect the presence
938
+ of errors, indicating that all happy-path artifacts produced by the step
939
+ are missing.
771
940
 
772
941
 
773
942
  Parameters
774
943
  ----------
775
- type : str, default 'default'
776
- Card type.
777
- id : str, optional, default None
778
- If multiple cards are present, use this id to identify this card.
779
- options : Dict[str, Any], default {}
780
- Options passed to the card. The contents depend on the card type.
781
- timeout : int, default 45
782
- Interrupt reporting if it takes more than this many seconds.
944
+ var : str, optional, default None
945
+ Name of the artifact in which to store the caught exception.
946
+ If not specified, the exception is not stored.
947
+ print_exception : bool, default True
948
+ Determines whether or not the exception is printed to
949
+ stdout when caught.
783
950
  """
784
951
  ...
785
952
 
786
953
  @typing.overload
787
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
954
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
788
955
  ...
789
956
 
790
957
  @typing.overload
791
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
958
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
792
959
  ...
793
960
 
794
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
961
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
795
962
  """
796
- Creates a human-readable report, a Metaflow Card, after this step completes.
963
+ Specifies that the step will success under all circumstances.
797
964
 
798
- Note that you may add multiple `@card` decorators in a step with different parameters.
965
+ The decorator will create an optional artifact, specified by `var`, which
966
+ contains the exception raised. You can use it to detect the presence
967
+ of errors, indicating that all happy-path artifacts produced by the step
968
+ are missing.
799
969
 
800
970
 
801
971
  Parameters
802
972
  ----------
803
- type : str, default 'default'
804
- Card type.
805
- id : str, optional, default None
806
- If multiple cards are present, use this id to identify this card.
807
- options : Dict[str, Any], default {}
808
- Options passed to the card. The contents depend on the card type.
809
- timeout : int, default 45
810
- Interrupt reporting if it takes more than this many seconds.
973
+ var : str, optional, default None
974
+ Name of the artifact in which to store the caught exception.
975
+ If not specified, the exception is not stored.
976
+ print_exception : bool, default True
977
+ Determines whether or not the exception is printed to
978
+ stdout when caught.
811
979
  """
812
980
  ...
813
981
 
814
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
982
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
815
983
  """
816
984
  Specifies that this step should execute on DGX cloud.
817
985
 
@@ -822,44 +990,54 @@ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[t
822
990
  Number of GPUs to use.
823
991
  gpu_type : str
824
992
  Type of Nvidia GPU to use.
825
- queue_timeout : int
826
- Time to keep the job in NVCF's queue.
827
993
  """
828
994
  ...
829
995
 
830
- @typing.overload
831
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
996
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
832
997
  """
833
- Specifies the number of times the task corresponding
834
- to a step needs to be retried.
998
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
835
999
 
836
- This decorator is useful for handling transient errors, such as networking issues.
837
- If your task contains operations that can't be retried safely, e.g. database updates,
838
- it is advisable to annotate it with `@retry(times=0)`.
1000
+ User code call
1001
+ --------------
1002
+ @ollama(
1003
+ models=[...],
1004
+ ...
1005
+ )
839
1006
 
840
- This can be used in conjunction with the `@catch` decorator. The `@catch`
841
- decorator will execute a no-op task after all retries have been exhausted,
842
- ensuring that the flow execution can continue.
1007
+ Valid backend options
1008
+ ---------------------
1009
+ - 'local': Run as a separate process on the local task machine.
1010
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1011
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1012
+
1013
+ Valid model options
1014
+ -------------------
1015
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
843
1016
 
844
1017
 
845
1018
  Parameters
846
1019
  ----------
847
- times : int, default 3
848
- Number of times to retry this task.
849
- minutes_between_retries : int, default 2
850
- Number of minutes between retries.
1020
+ models: list[str]
1021
+ List of Ollama containers running models in sidecars.
1022
+ backend: str
1023
+ Determines where and how to run the Ollama process.
1024
+ force_pull: bool
1025
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1026
+ cache_update_policy: str
1027
+ Cache update policy: "auto", "force", or "never".
1028
+ force_cache_update: bool
1029
+ Simple override for "force" cache update policy.
1030
+ debug: bool
1031
+ Whether to turn on verbose debugging logs.
1032
+ circuit_breaker_config: dict
1033
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1034
+ timeout_config: dict
1035
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
851
1036
  """
852
1037
  ...
853
1038
 
854
1039
  @typing.overload
855
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
856
- ...
857
-
858
- @typing.overload
859
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
860
- ...
861
-
862
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1040
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
863
1041
  """
864
1042
  Specifies the number of times the task corresponding
865
1043
  to a step needs to be retried.
@@ -877,171 +1055,58 @@ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
877
1055
  ----------
878
1056
  times : int, default 3
879
1057
  Number of times to retry this task.
880
- minutes_between_retries : int, default 2
881
- Number of minutes between retries.
882
- """
883
- ...
884
-
885
- @typing.overload
886
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
887
- """
888
- Decorator prototype for all step decorators. This function gets specialized
889
- and imported for all decorators types by _import_plugin_decorators().
890
- """
891
- ...
892
-
893
- @typing.overload
894
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
895
- ...
896
-
897
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
898
- """
899
- Decorator prototype for all step decorators. This function gets specialized
900
- and imported for all decorators types by _import_plugin_decorators().
901
- """
902
- ...
903
-
904
- @typing.overload
905
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
906
- """
907
- Specifies the resources needed when executing this step.
908
-
909
- Use `@resources` to specify the resource requirements
910
- independently of the specific compute layer (`@batch`, `@kubernetes`).
911
-
912
- You can choose the compute layer on the command line by executing e.g.
913
- ```
914
- python myflow.py run --with batch
915
- ```
916
- or
917
- ```
918
- python myflow.py run --with kubernetes
919
- ```
920
- which executes the flow on the desired system using the
921
- requirements specified in `@resources`.
922
-
923
-
924
- Parameters
925
- ----------
926
- cpu : int, default 1
927
- Number of CPUs required for this step.
928
- gpu : int, optional, default None
929
- Number of GPUs required for this step.
930
- disk : int, optional, default None
931
- Disk size (in MB) required for this step. Only applies on Kubernetes.
932
- memory : int, default 4096
933
- Memory size (in MB) required for this step.
934
- shared_memory : int, optional, default None
935
- The value for the size (in MiB) of the /dev/shm volume for this step.
936
- This parameter maps to the `--shm-size` option in Docker.
937
- """
938
- ...
939
-
940
- @typing.overload
941
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
942
- ...
943
-
944
- @typing.overload
945
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
946
- ...
947
-
948
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
949
- """
950
- Specifies the resources needed when executing this step.
951
-
952
- Use `@resources` to specify the resource requirements
953
- independently of the specific compute layer (`@batch`, `@kubernetes`).
954
-
955
- You can choose the compute layer on the command line by executing e.g.
956
- ```
957
- python myflow.py run --with batch
958
- ```
959
- or
960
- ```
961
- python myflow.py run --with kubernetes
962
- ```
963
- which executes the flow on the desired system using the
964
- requirements specified in `@resources`.
965
-
966
-
967
- Parameters
968
- ----------
969
- cpu : int, default 1
970
- Number of CPUs required for this step.
971
- gpu : int, optional, default None
972
- Number of GPUs required for this step.
973
- disk : int, optional, default None
974
- Disk size (in MB) required for this step. Only applies on Kubernetes.
975
- memory : int, default 4096
976
- Memory size (in MB) required for this step.
977
- shared_memory : int, optional, default None
978
- The value for the size (in MiB) of the /dev/shm volume for this step.
979
- This parameter maps to the `--shm-size` option in Docker.
980
- """
981
- ...
982
-
983
- @typing.overload
984
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
985
- """
986
- Specifies that the step will success under all circumstances.
987
-
988
- The decorator will create an optional artifact, specified by `var`, which
989
- contains the exception raised. You can use it to detect the presence
990
- of errors, indicating that all happy-path artifacts produced by the step
991
- are missing.
992
-
993
-
994
- Parameters
995
- ----------
996
- var : str, optional, default None
997
- Name of the artifact in which to store the caught exception.
998
- If not specified, the exception is not stored.
999
- print_exception : bool, default True
1000
- Determines whether or not the exception is printed to
1001
- stdout when caught.
1058
+ minutes_between_retries : int, default 2
1059
+ Number of minutes between retries.
1002
1060
  """
1003
1061
  ...
1004
1062
 
1005
1063
  @typing.overload
1006
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1064
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1007
1065
  ...
1008
1066
 
1009
1067
  @typing.overload
1010
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1068
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1011
1069
  ...
1012
1070
 
1013
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1071
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1014
1072
  """
1015
- Specifies that the step will success under all circumstances.
1073
+ Specifies the number of times the task corresponding
1074
+ to a step needs to be retried.
1016
1075
 
1017
- The decorator will create an optional artifact, specified by `var`, which
1018
- contains the exception raised. You can use it to detect the presence
1019
- of errors, indicating that all happy-path artifacts produced by the step
1020
- are missing.
1076
+ This decorator is useful for handling transient errors, such as networking issues.
1077
+ If your task contains operations that can't be retried safely, e.g. database updates,
1078
+ it is advisable to annotate it with `@retry(times=0)`.
1079
+
1080
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1081
+ decorator will execute a no-op task after all retries have been exhausted,
1082
+ ensuring that the flow execution can continue.
1021
1083
 
1022
1084
 
1023
1085
  Parameters
1024
1086
  ----------
1025
- var : str, optional, default None
1026
- Name of the artifact in which to store the caught exception.
1027
- If not specified, the exception is not stored.
1028
- print_exception : bool, default True
1029
- Determines whether or not the exception is printed to
1030
- stdout when caught.
1087
+ times : int, default 3
1088
+ Number of times to retry this task.
1089
+ minutes_between_retries : int, default 2
1090
+ Number of minutes between retries.
1031
1091
  """
1032
1092
  ...
1033
1093
 
1034
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1094
+ @typing.overload
1095
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1035
1096
  """
1036
- Specifies that this step should execute on DGX cloud.
1037
-
1038
-
1039
- Parameters
1040
- ----------
1041
- gpu : int
1042
- Number of GPUs to use.
1043
- gpu_type : str
1044
- Type of Nvidia GPU to use.
1097
+ Decorator prototype for all step decorators. This function gets specialized
1098
+ and imported for all decorators types by _import_plugin_decorators().
1099
+ """
1100
+ ...
1101
+
1102
+ @typing.overload
1103
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1104
+ ...
1105
+
1106
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1107
+ """
1108
+ Decorator prototype for all step decorators. This function gets specialized
1109
+ and imported for all decorators types by _import_plugin_decorators().
1045
1110
  """
1046
1111
  ...
1047
1112
 
@@ -1096,167 +1161,102 @@ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
1096
1161
  """
1097
1162
  ...
1098
1163
 
1099
- @typing.overload
1100
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1164
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1101
1165
  """
1102
- Enables checkpointing for a step.
1103
-
1104
- > Examples
1105
-
1106
- - Saving Checkpoints
1166
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
1107
1167
 
1108
- ```python
1109
- @checkpoint
1110
- @step
1111
- def train(self):
1112
- model = create_model(self.parameters, checkpoint_path = None)
1113
- for i in range(self.epochs):
1114
- # some training logic
1115
- loss = model.train(self.dataset)
1116
- if i % 10 == 0:
1117
- model.save(
1118
- current.checkpoint.directory,
1119
- )
1120
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1121
- # and returns a reference dictionary to the checkpoint saved in the datastore
1122
- self.latest_checkpoint = current.checkpoint.save(
1123
- name="epoch_checkpoint",
1124
- metadata={
1125
- "epoch": i,
1126
- "loss": loss,
1127
- }
1128
- )
1129
- ```
1168
+ User code call
1169
+ --------------
1170
+ @vllm(
1171
+ model="...",
1172
+ ...
1173
+ )
1130
1174
 
1131
- - Using Loaded Checkpoints
1175
+ Valid backend options
1176
+ ---------------------
1177
+ - 'local': Run as a separate process on the local task machine.
1132
1178
 
1133
- ```python
1134
- @retry(times=3)
1135
- @checkpoint
1136
- @step
1137
- def train(self):
1138
- # Assume that the task has restarted and the previous attempt of the task
1139
- # saved a checkpoint
1140
- checkpoint_path = None
1141
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1142
- print("Loaded checkpoint from the previous attempt")
1143
- checkpoint_path = current.checkpoint.directory
1179
+ Valid model options
1180
+ -------------------
1181
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1144
1182
 
1145
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1146
- for i in range(self.epochs):
1147
- ...
1148
- ```
1183
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1184
+ If you need multiple models, you must create multiple @vllm decorators.
1149
1185
 
1150
1186
 
1151
1187
  Parameters
1152
1188
  ----------
1153
- load_policy : str, default: "fresh"
1154
- The policy for loading the checkpoint. The following policies are supported:
1155
- - "eager": Loads the the latest available checkpoint within the namespace.
1156
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1157
- will be loaded at the start of the task.
1158
- - "none": Do not load any checkpoint
1159
- - "fresh": Loads the lastest checkpoint created within the running Task.
1160
- This mode helps loading checkpoints across various retry attempts of the same task.
1161
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1162
- created within the task will be loaded when the task is retries execution on failure.
1163
-
1164
- temp_dir_root : str, default: None
1165
- The root directory under which `current.checkpoint.directory` will be created.
1189
+ model: str
1190
+ HuggingFace model identifier to be served by vLLM.
1191
+ backend: str
1192
+ Determines where and how to run the vLLM process.
1193
+ openai_api_server: bool
1194
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1195
+ Default is False (uses native engine).
1196
+ Set to True for backward compatibility with existing code.
1197
+ debug: bool
1198
+ Whether to turn on verbose debugging logs.
1199
+ card_refresh_interval: int
1200
+ Interval in seconds for refreshing the vLLM status card.
1201
+ Only used when openai_api_server=True.
1202
+ max_retries: int
1203
+ Maximum number of retries checking for vLLM server startup.
1204
+ Only used when openai_api_server=True.
1205
+ retry_alert_frequency: int
1206
+ Frequency of alert logs for vLLM server startup retries.
1207
+ Only used when openai_api_server=True.
1208
+ engine_args : dict
1209
+ Additional keyword arguments to pass to the vLLM engine.
1210
+ For example, `tensor_parallel_size=2`.
1166
1211
  """
1167
1212
  ...
1168
1213
 
1169
1214
  @typing.overload
1170
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1171
- ...
1172
-
1173
- @typing.overload
1174
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1175
- ...
1176
-
1177
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
1215
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1178
1216
  """
1179
- Enables checkpointing for a step.
1180
-
1181
- > Examples
1182
-
1183
- - Saving Checkpoints
1184
-
1185
- ```python
1186
- @checkpoint
1187
- @step
1188
- def train(self):
1189
- model = create_model(self.parameters, checkpoint_path = None)
1190
- for i in range(self.epochs):
1191
- # some training logic
1192
- loss = model.train(self.dataset)
1193
- if i % 10 == 0:
1194
- model.save(
1195
- current.checkpoint.directory,
1196
- )
1197
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1198
- # and returns a reference dictionary to the checkpoint saved in the datastore
1199
- self.latest_checkpoint = current.checkpoint.save(
1200
- name="epoch_checkpoint",
1201
- metadata={
1202
- "epoch": i,
1203
- "loss": loss,
1204
- }
1205
- )
1206
- ```
1207
-
1208
- - Using Loaded Checkpoints
1209
-
1210
- ```python
1211
- @retry(times=3)
1212
- @checkpoint
1213
- @step
1214
- def train(self):
1215
- # Assume that the task has restarted and the previous attempt of the task
1216
- # saved a checkpoint
1217
- checkpoint_path = None
1218
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1219
- print("Loaded checkpoint from the previous attempt")
1220
- checkpoint_path = current.checkpoint.directory
1217
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1221
1218
 
1222
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1223
- for i in range(self.epochs):
1224
- ...
1225
- ```
1219
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1226
1220
 
1227
1221
 
1228
1222
  Parameters
1229
1223
  ----------
1230
- load_policy : str, default: "fresh"
1231
- The policy for loading the checkpoint. The following policies are supported:
1232
- - "eager": Loads the the latest available checkpoint within the namespace.
1233
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1234
- will be loaded at the start of the task.
1235
- - "none": Do not load any checkpoint
1236
- - "fresh": Loads the lastest checkpoint created within the running Task.
1237
- This mode helps loading checkpoints across various retry attempts of the same task.
1238
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1239
- created within the task will be loaded when the task is retries execution on failure.
1240
-
1241
- temp_dir_root : str, default: None
1242
- The root directory under which `current.checkpoint.directory` will be created.
1224
+ type : str, default 'default'
1225
+ Card type.
1226
+ id : str, optional, default None
1227
+ If multiple cards are present, use this id to identify this card.
1228
+ options : Dict[str, Any], default {}
1229
+ Options passed to the card. The contents depend on the card type.
1230
+ timeout : int, default 45
1231
+ Interrupt reporting if it takes more than this many seconds.
1243
1232
  """
1244
1233
  ...
1245
1234
 
1246
1235
  @typing.overload
1247
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1248
- """
1249
- Internal decorator to support Fast bakery
1250
- """
1236
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1251
1237
  ...
1252
1238
 
1253
1239
  @typing.overload
1254
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1240
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1255
1241
  ...
1256
1242
 
1257
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1243
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1258
1244
  """
1259
- Internal decorator to support Fast bakery
1245
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1246
+
1247
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1248
+
1249
+
1250
+ Parameters
1251
+ ----------
1252
+ type : str, default 'default'
1253
+ Card type.
1254
+ id : str, optional, default None
1255
+ If multiple cards are present, use this id to identify this card.
1256
+ options : Dict[str, Any], default {}
1257
+ Options passed to the card. The contents depend on the card type.
1258
+ timeout : int, default 45
1259
+ Interrupt reporting if it takes more than this many seconds.
1260
1260
  """
1261
1261
  ...
1262
1262
 
@@ -1303,158 +1303,6 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
1303
1303
  """
1304
1304
  ...
1305
1305
 
1306
- @typing.overload
1307
- def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1308
- """
1309
- Specifies the flow(s) that this flow depends on.
1310
-
1311
- ```
1312
- @trigger_on_finish(flow='FooFlow')
1313
- ```
1314
- or
1315
- ```
1316
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1317
- ```
1318
- This decorator respects the @project decorator and triggers the flow
1319
- when upstream runs within the same namespace complete successfully
1320
-
1321
- Additionally, you can specify project aware upstream flow dependencies
1322
- by specifying the fully qualified project_flow_name.
1323
- ```
1324
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1325
- ```
1326
- or
1327
- ```
1328
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1329
- ```
1330
-
1331
- You can also specify just the project or project branch (other values will be
1332
- inferred from the current project or project branch):
1333
- ```
1334
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1335
- ```
1336
-
1337
- Note that `branch` is typically one of:
1338
- - `prod`
1339
- - `user.bob`
1340
- - `test.my_experiment`
1341
- - `prod.staging`
1342
-
1343
-
1344
- Parameters
1345
- ----------
1346
- flow : Union[str, Dict[str, str]], optional, default None
1347
- Upstream flow dependency for this flow.
1348
- flows : List[Union[str, Dict[str, str]]], default []
1349
- Upstream flow dependencies for this flow.
1350
- options : Dict[str, Any], default {}
1351
- Backend-specific configuration for tuning eventing behavior.
1352
- """
1353
- ...
1354
-
1355
- @typing.overload
1356
- def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1357
- ...
1358
-
1359
- def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1360
- """
1361
- Specifies the flow(s) that this flow depends on.
1362
-
1363
- ```
1364
- @trigger_on_finish(flow='FooFlow')
1365
- ```
1366
- or
1367
- ```
1368
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1369
- ```
1370
- This decorator respects the @project decorator and triggers the flow
1371
- when upstream runs within the same namespace complete successfully
1372
-
1373
- Additionally, you can specify project aware upstream flow dependencies
1374
- by specifying the fully qualified project_flow_name.
1375
- ```
1376
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1377
- ```
1378
- or
1379
- ```
1380
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1381
- ```
1382
-
1383
- You can also specify just the project or project branch (other values will be
1384
- inferred from the current project or project branch):
1385
- ```
1386
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1387
- ```
1388
-
1389
- Note that `branch` is typically one of:
1390
- - `prod`
1391
- - `user.bob`
1392
- - `test.my_experiment`
1393
- - `prod.staging`
1394
-
1395
-
1396
- Parameters
1397
- ----------
1398
- flow : Union[str, Dict[str, str]], optional, default None
1399
- Upstream flow dependency for this flow.
1400
- flows : List[Union[str, Dict[str, str]]], default []
1401
- Upstream flow dependencies for this flow.
1402
- options : Dict[str, Any], default {}
1403
- Backend-specific configuration for tuning eventing behavior.
1404
- """
1405
- ...
1406
-
1407
- @typing.overload
1408
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1409
- """
1410
- Specifies the Conda environment for all steps of the flow.
1411
-
1412
- Use `@conda_base` to set common libraries required by all
1413
- steps and use `@conda` to specify step-specific additions.
1414
-
1415
-
1416
- Parameters
1417
- ----------
1418
- packages : Dict[str, str], default {}
1419
- Packages to use for this flow. The key is the name of the package
1420
- and the value is the version to use.
1421
- libraries : Dict[str, str], default {}
1422
- Supported for backward compatibility. When used with packages, packages will take precedence.
1423
- python : str, optional, default None
1424
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1425
- that the version used will correspond to the version of the Python interpreter used to start the run.
1426
- disabled : bool, default False
1427
- If set to True, disables Conda.
1428
- """
1429
- ...
1430
-
1431
- @typing.overload
1432
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1433
- ...
1434
-
1435
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1436
- """
1437
- Specifies the Conda environment for all steps of the flow.
1438
-
1439
- Use `@conda_base` to set common libraries required by all
1440
- steps and use `@conda` to specify step-specific additions.
1441
-
1442
-
1443
- Parameters
1444
- ----------
1445
- packages : Dict[str, str], default {}
1446
- Packages to use for this flow. The key is the name of the package
1447
- and the value is the version to use.
1448
- libraries : Dict[str, str], default {}
1449
- Supported for backward compatibility. When used with packages, packages will take precedence.
1450
- python : str, optional, default None
1451
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1452
- that the version used will correspond to the version of the Python interpreter used to start the run.
1453
- disabled : bool, default False
1454
- If set to True, disables Conda.
1455
- """
1456
- ...
1457
-
1458
1306
  def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1459
1307
  """
1460
1308
  Allows setting external datastores to save data for the
@@ -1555,110 +1403,17 @@ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None)
1555
1403
  ----------
1556
1404
 
1557
1405
  type: str
1558
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1559
-
1560
- config: dict or Callable
1561
- Dictionary of configuration options for the datastore. The following keys are required:
1562
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1563
- - example: 's3://bucket-name/path/to/root'
1564
- - example: 'gs://bucket-name/path/to/root'
1565
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1566
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1567
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1568
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1569
- """
1570
- ...
1571
-
1572
- @typing.overload
1573
- def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1574
- """
1575
- Specifies the event(s) that this flow depends on.
1576
-
1577
- ```
1578
- @trigger(event='foo')
1579
- ```
1580
- or
1581
- ```
1582
- @trigger(events=['foo', 'bar'])
1583
- ```
1584
-
1585
- Additionally, you can specify the parameter mappings
1586
- to map event payload to Metaflow parameters for the flow.
1587
- ```
1588
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1589
- ```
1590
- or
1591
- ```
1592
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1593
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1594
- ```
1595
-
1596
- 'parameters' can also be a list of strings and tuples like so:
1597
- ```
1598
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1599
- ```
1600
- This is equivalent to:
1601
- ```
1602
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1603
- ```
1604
-
1605
-
1606
- Parameters
1607
- ----------
1608
- event : Union[str, Dict[str, Any]], optional, default None
1609
- Event dependency for this flow.
1610
- events : List[Union[str, Dict[str, Any]]], default []
1611
- Events dependency for this flow.
1612
- options : Dict[str, Any], default {}
1613
- Backend-specific configuration for tuning eventing behavior.
1614
- """
1615
- ...
1616
-
1617
- @typing.overload
1618
- def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1619
- ...
1620
-
1621
- def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1622
- """
1623
- Specifies the event(s) that this flow depends on.
1624
-
1625
- ```
1626
- @trigger(event='foo')
1627
- ```
1628
- or
1629
- ```
1630
- @trigger(events=['foo', 'bar'])
1631
- ```
1632
-
1633
- Additionally, you can specify the parameter mappings
1634
- to map event payload to Metaflow parameters for the flow.
1635
- ```
1636
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1637
- ```
1638
- or
1639
- ```
1640
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1641
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1642
- ```
1643
-
1644
- 'parameters' can also be a list of strings and tuples like so:
1645
- ```
1646
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1647
- ```
1648
- This is equivalent to:
1649
- ```
1650
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1651
- ```
1652
-
1653
-
1654
- Parameters
1655
- ----------
1656
- event : Union[str, Dict[str, Any]], optional, default None
1657
- Event dependency for this flow.
1658
- events : List[Union[str, Dict[str, Any]]], default []
1659
- Events dependency for this flow.
1660
- options : Dict[str, Any], default {}
1661
- Backend-specific configuration for tuning eventing behavior.
1406
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1407
+
1408
+ config: dict or Callable
1409
+ Dictionary of configuration options for the datastore. The following keys are required:
1410
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1411
+ - example: 's3://bucket-name/path/to/root'
1412
+ - example: 'gs://bucket-name/path/to/root'
1413
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1414
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1415
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1416
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1662
1417
  """
1663
1418
  ...
1664
1419
 
@@ -1756,6 +1511,107 @@ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str,
1756
1511
  """
1757
1512
  ...
1758
1513
 
1514
+ @typing.overload
1515
+ def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1516
+ """
1517
+ Specifies the flow(s) that this flow depends on.
1518
+
1519
+ ```
1520
+ @trigger_on_finish(flow='FooFlow')
1521
+ ```
1522
+ or
1523
+ ```
1524
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1525
+ ```
1526
+ This decorator respects the @project decorator and triggers the flow
1527
+ when upstream runs within the same namespace complete successfully
1528
+
1529
+ Additionally, you can specify project aware upstream flow dependencies
1530
+ by specifying the fully qualified project_flow_name.
1531
+ ```
1532
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1533
+ ```
1534
+ or
1535
+ ```
1536
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1537
+ ```
1538
+
1539
+ You can also specify just the project or project branch (other values will be
1540
+ inferred from the current project or project branch):
1541
+ ```
1542
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1543
+ ```
1544
+
1545
+ Note that `branch` is typically one of:
1546
+ - `prod`
1547
+ - `user.bob`
1548
+ - `test.my_experiment`
1549
+ - `prod.staging`
1550
+
1551
+
1552
+ Parameters
1553
+ ----------
1554
+ flow : Union[str, Dict[str, str]], optional, default None
1555
+ Upstream flow dependency for this flow.
1556
+ flows : List[Union[str, Dict[str, str]]], default []
1557
+ Upstream flow dependencies for this flow.
1558
+ options : Dict[str, Any], default {}
1559
+ Backend-specific configuration for tuning eventing behavior.
1560
+ """
1561
+ ...
1562
+
1563
+ @typing.overload
1564
+ def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1565
+ ...
1566
+
1567
+ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1568
+ """
1569
+ Specifies the flow(s) that this flow depends on.
1570
+
1571
+ ```
1572
+ @trigger_on_finish(flow='FooFlow')
1573
+ ```
1574
+ or
1575
+ ```
1576
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1577
+ ```
1578
+ This decorator respects the @project decorator and triggers the flow
1579
+ when upstream runs within the same namespace complete successfully
1580
+
1581
+ Additionally, you can specify project aware upstream flow dependencies
1582
+ by specifying the fully qualified project_flow_name.
1583
+ ```
1584
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1585
+ ```
1586
+ or
1587
+ ```
1588
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1589
+ ```
1590
+
1591
+ You can also specify just the project or project branch (other values will be
1592
+ inferred from the current project or project branch):
1593
+ ```
1594
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1595
+ ```
1596
+
1597
+ Note that `branch` is typically one of:
1598
+ - `prod`
1599
+ - `user.bob`
1600
+ - `test.my_experiment`
1601
+ - `prod.staging`
1602
+
1603
+
1604
+ Parameters
1605
+ ----------
1606
+ flow : Union[str, Dict[str, str]], optional, default None
1607
+ Upstream flow dependency for this flow.
1608
+ flows : List[Union[str, Dict[str, str]]], default []
1609
+ Upstream flow dependencies for this flow.
1610
+ options : Dict[str, Any], default {}
1611
+ Backend-specific configuration for tuning eventing behavior.
1612
+ """
1613
+ ...
1614
+
1759
1615
  def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1760
1616
  """
1761
1617
  Specifies what flows belong to the same project.
@@ -1791,6 +1647,99 @@ def project(*, name: str, branch: typing.Optional[str] = None, production: bool
1791
1647
  """
1792
1648
  ...
1793
1649
 
1650
+ @typing.overload
1651
+ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1652
+ """
1653
+ Specifies the event(s) that this flow depends on.
1654
+
1655
+ ```
1656
+ @trigger(event='foo')
1657
+ ```
1658
+ or
1659
+ ```
1660
+ @trigger(events=['foo', 'bar'])
1661
+ ```
1662
+
1663
+ Additionally, you can specify the parameter mappings
1664
+ to map event payload to Metaflow parameters for the flow.
1665
+ ```
1666
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1667
+ ```
1668
+ or
1669
+ ```
1670
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1671
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1672
+ ```
1673
+
1674
+ 'parameters' can also be a list of strings and tuples like so:
1675
+ ```
1676
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1677
+ ```
1678
+ This is equivalent to:
1679
+ ```
1680
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1681
+ ```
1682
+
1683
+
1684
+ Parameters
1685
+ ----------
1686
+ event : Union[str, Dict[str, Any]], optional, default None
1687
+ Event dependency for this flow.
1688
+ events : List[Union[str, Dict[str, Any]]], default []
1689
+ Events dependency for this flow.
1690
+ options : Dict[str, Any], default {}
1691
+ Backend-specific configuration for tuning eventing behavior.
1692
+ """
1693
+ ...
1694
+
1695
+ @typing.overload
1696
+ def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1697
+ ...
1698
+
1699
+ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1700
+ """
1701
+ Specifies the event(s) that this flow depends on.
1702
+
1703
+ ```
1704
+ @trigger(event='foo')
1705
+ ```
1706
+ or
1707
+ ```
1708
+ @trigger(events=['foo', 'bar'])
1709
+ ```
1710
+
1711
+ Additionally, you can specify the parameter mappings
1712
+ to map event payload to Metaflow parameters for the flow.
1713
+ ```
1714
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1715
+ ```
1716
+ or
1717
+ ```
1718
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1719
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1720
+ ```
1721
+
1722
+ 'parameters' can also be a list of strings and tuples like so:
1723
+ ```
1724
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1725
+ ```
1726
+ This is equivalent to:
1727
+ ```
1728
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1729
+ ```
1730
+
1731
+
1732
+ Parameters
1733
+ ----------
1734
+ event : Union[str, Dict[str, Any]], optional, default None
1735
+ Event dependency for this flow.
1736
+ events : List[Union[str, Dict[str, Any]]], default []
1737
+ Events dependency for this flow.
1738
+ options : Dict[str, Any], default {}
1739
+ Backend-specific configuration for tuning eventing behavior.
1740
+ """
1741
+ ...
1742
+
1794
1743
  @typing.overload
1795
1744
  def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1796
1745
  """
@@ -1832,5 +1781,56 @@ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packag
1832
1781
  """
1833
1782
  ...
1834
1783
 
1784
+ @typing.overload
1785
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1786
+ """
1787
+ Specifies the Conda environment for all steps of the flow.
1788
+
1789
+ Use `@conda_base` to set common libraries required by all
1790
+ steps and use `@conda` to specify step-specific additions.
1791
+
1792
+
1793
+ Parameters
1794
+ ----------
1795
+ packages : Dict[str, str], default {}
1796
+ Packages to use for this flow. The key is the name of the package
1797
+ and the value is the version to use.
1798
+ libraries : Dict[str, str], default {}
1799
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1800
+ python : str, optional, default None
1801
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1802
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1803
+ disabled : bool, default False
1804
+ If set to True, disables Conda.
1805
+ """
1806
+ ...
1807
+
1808
+ @typing.overload
1809
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1810
+ ...
1811
+
1812
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1813
+ """
1814
+ Specifies the Conda environment for all steps of the flow.
1815
+
1816
+ Use `@conda_base` to set common libraries required by all
1817
+ steps and use `@conda` to specify step-specific additions.
1818
+
1819
+
1820
+ Parameters
1821
+ ----------
1822
+ packages : Dict[str, str], default {}
1823
+ Packages to use for this flow. The key is the name of the package
1824
+ and the value is the version to use.
1825
+ libraries : Dict[str, str], default {}
1826
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1827
+ python : str, optional, default None
1828
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1829
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1830
+ disabled : bool, default False
1831
+ If set to True, disables Conda.
1832
+ """
1833
+ ...
1834
+
1835
1835
  pkg_name: str
1836
1836