ob-metaflow-stubs 6.0.4.1rc0__py2.py3-none-any.whl → 6.0.4.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. metaflow-stubs/__init__.pyi +1023 -1023
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +4 -4
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +5 -5
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/meta_files.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +52 -52
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +3 -3
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +2 -2
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +3 -3
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +1 -1
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +3 -3
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +1 -1
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +3 -3
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +1 -1
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +4 -1
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +5 -2
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +1 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +4 -4
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +2 -2
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +6 -3
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +3 -3
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +64 -5
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +2 -2
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +1 -1
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +1 -1
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +4 -4
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +2 -2
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +1 -1
  115. metaflow-stubs/multicore_utils.pyi +1 -1
  116. metaflow-stubs/ob_internal.pyi +1 -1
  117. metaflow-stubs/packaging_sys/__init__.pyi +6 -6
  118. metaflow-stubs/packaging_sys/backend.pyi +2 -2
  119. metaflow-stubs/packaging_sys/distribution_support.pyi +2 -2
  120. metaflow-stubs/packaging_sys/tar_backend.pyi +4 -4
  121. metaflow-stubs/packaging_sys/utils.pyi +1 -1
  122. metaflow-stubs/packaging_sys/v1.pyi +3 -3
  123. metaflow-stubs/parameters.pyi +3 -3
  124. metaflow-stubs/plugins/__init__.pyi +12 -12
  125. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  126. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  127. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  128. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  129. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  130. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  131. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  132. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  133. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  134. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  135. metaflow-stubs/plugins/argo/argo_workflows.pyi +3 -3
  136. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +1 -1
  137. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +4 -4
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +3 -3
  139. metaflow-stubs/plugins/argo/exit_hooks.pyi +2 -2
  140. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  141. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  142. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  143. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  144. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  145. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  146. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  147. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  148. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  149. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  150. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  151. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  152. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  153. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +3 -3
  156. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  157. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  158. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  159. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  160. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  161. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  162. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  163. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  164. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  166. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  167. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  169. metaflow-stubs/plugins/cards/card_modules/basic.pyi +1 -1
  170. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  171. metaflow-stubs/plugins/cards/card_modules/components.pyi +3 -3
  172. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  173. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  174. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  175. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  176. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  177. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  178. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  179. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  180. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  181. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  182. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  183. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  184. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  185. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  186. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  187. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  188. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  189. metaflow-stubs/plugins/exit_hook/__init__.pyi +1 -1
  190. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +1 -1
  191. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  192. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  193. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  194. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  195. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  196. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  197. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  198. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  199. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  200. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  201. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  202. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  203. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  204. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  205. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  206. metaflow-stubs/plugins/ollama/__init__.pyi +1 -1
  207. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  208. metaflow-stubs/plugins/perimeters.pyi +1 -1
  209. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  210. metaflow-stubs/plugins/pypi/__init__.pyi +1 -1
  211. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  212. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -4
  213. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  214. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  215. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  216. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  217. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  218. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  219. metaflow-stubs/plugins/secrets/__init__.pyi +1 -1
  220. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  221. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  222. metaflow-stubs/plugins/secrets/secrets_func.pyi +1 -1
  223. metaflow-stubs/plugins/secrets/secrets_spec.pyi +1 -1
  224. metaflow-stubs/plugins/secrets/utils.pyi +1 -1
  225. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  226. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  227. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  228. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  229. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  230. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  231. metaflow-stubs/plugins/uv/uv_environment.pyi +2 -2
  232. metaflow-stubs/profilers/__init__.pyi +1 -1
  233. metaflow-stubs/pylint_wrapper.pyi +1 -1
  234. metaflow-stubs/runner/__init__.pyi +1 -1
  235. metaflow-stubs/runner/deployer.pyi +29 -29
  236. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  237. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  238. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  239. metaflow-stubs/runner/nbrun.pyi +1 -1
  240. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  241. metaflow-stubs/runner/utils.pyi +2 -2
  242. metaflow-stubs/system/__init__.pyi +1 -1
  243. metaflow-stubs/system/system_logger.pyi +2 -2
  244. metaflow-stubs/system/system_monitor.pyi +1 -1
  245. metaflow-stubs/tagging_util.pyi +1 -1
  246. metaflow-stubs/tuple_util.pyi +1 -1
  247. metaflow-stubs/user_configs/__init__.pyi +1 -1
  248. metaflow-stubs/user_configs/config_options.pyi +3 -3
  249. metaflow-stubs/user_configs/config_parameters.pyi +5 -5
  250. metaflow-stubs/user_decorators/__init__.pyi +1 -1
  251. metaflow-stubs/user_decorators/common.pyi +1 -1
  252. metaflow-stubs/user_decorators/mutable_flow.pyi +4 -4
  253. metaflow-stubs/user_decorators/mutable_step.pyi +4 -4
  254. metaflow-stubs/user_decorators/user_flow_decorator.pyi +4 -4
  255. metaflow-stubs/user_decorators/user_step_decorator.pyi +5 -5
  256. {ob_metaflow_stubs-6.0.4.1rc0.dist-info → ob_metaflow_stubs-6.0.4.2.dist-info}/METADATA +1 -1
  257. ob_metaflow_stubs-6.0.4.2.dist-info/RECORD +260 -0
  258. ob_metaflow_stubs-6.0.4.1rc0.dist-info/RECORD +0 -260
  259. {ob_metaflow_stubs-6.0.4.1rc0.dist-info → ob_metaflow_stubs-6.0.4.2.dist-info}/WHEEL +0 -0
  260. {ob_metaflow_stubs-6.0.4.1rc0.dist-info → ob_metaflow_stubs-6.0.4.2.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.16.0.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-14T20:03:25.730478 #
4
+ # Generated on 2025-07-15T03:12:46.861592 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import datetime
12
11
  import typing
12
+ import datetime
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -39,17 +39,17 @@ from .user_decorators.user_step_decorator import UserStepDecorator as UserStepDe
39
39
  from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
40
  from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
41
  from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
42
- from . import cards as cards
43
42
  from . import tuple_util as tuple_util
44
- from . import events as events
43
+ from . import cards as cards
45
44
  from . import metaflow_git as metaflow_git
45
+ from . import events as events
46
46
  from . import runner as runner
47
47
  from . import plugins as plugins
48
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
49
49
  from . import includefile as includefile
50
50
  from .includefile import IncludeFile as IncludeFile
51
- from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
52
51
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
52
+ from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
53
53
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
54
54
  from . import client as client
55
55
  from .client.core import namespace as namespace
@@ -163,213 +163,291 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
163
163
  ...
164
164
 
165
165
  @typing.overload
166
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
166
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
167
167
  """
168
- Specifies that the step will success under all circumstances.
168
+ Specifies a timeout for your step.
169
169
 
170
- The decorator will create an optional artifact, specified by `var`, which
171
- contains the exception raised. You can use it to detect the presence
172
- of errors, indicating that all happy-path artifacts produced by the step
173
- are missing.
170
+ This decorator is useful if this step may hang indefinitely.
171
+
172
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
173
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
174
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
175
+
176
+ Note that all the values specified in parameters are added together so if you specify
177
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
174
178
 
175
179
 
176
180
  Parameters
177
181
  ----------
178
- var : str, optional, default None
179
- Name of the artifact in which to store the caught exception.
180
- If not specified, the exception is not stored.
181
- print_exception : bool, default True
182
- Determines whether or not the exception is printed to
183
- stdout when caught.
182
+ seconds : int, default 0
183
+ Number of seconds to wait prior to timing out.
184
+ minutes : int, default 0
185
+ Number of minutes to wait prior to timing out.
186
+ hours : int, default 0
187
+ Number of hours to wait prior to timing out.
184
188
  """
185
189
  ...
186
190
 
187
191
  @typing.overload
188
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
192
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
189
193
  ...
190
194
 
191
195
  @typing.overload
192
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
196
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
193
197
  ...
194
198
 
195
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
199
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
196
200
  """
197
- Specifies that the step will success under all circumstances.
201
+ Specifies a timeout for your step.
198
202
 
199
- The decorator will create an optional artifact, specified by `var`, which
200
- contains the exception raised. You can use it to detect the presence
201
- of errors, indicating that all happy-path artifacts produced by the step
202
- are missing.
203
+ This decorator is useful if this step may hang indefinitely.
204
+
205
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
206
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
207
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
208
+
209
+ Note that all the values specified in parameters are added together so if you specify
210
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
203
211
 
204
212
 
205
213
  Parameters
206
214
  ----------
207
- var : str, optional, default None
208
- Name of the artifact in which to store the caught exception.
209
- If not specified, the exception is not stored.
210
- print_exception : bool, default True
211
- Determines whether or not the exception is printed to
212
- stdout when caught.
213
- """
214
- ...
215
-
216
- @typing.overload
217
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
218
- """
219
- Decorator prototype for all step decorators. This function gets specialized
220
- and imported for all decorators types by _import_plugin_decorators().
221
- """
222
- ...
223
-
224
- @typing.overload
225
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
226
- ...
227
-
228
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
229
- """
230
- Decorator prototype for all step decorators. This function gets specialized
231
- and imported for all decorators types by _import_plugin_decorators().
215
+ seconds : int, default 0
216
+ Number of seconds to wait prior to timing out.
217
+ minutes : int, default 0
218
+ Number of minutes to wait prior to timing out.
219
+ hours : int, default 0
220
+ Number of hours to wait prior to timing out.
232
221
  """
233
222
  ...
234
223
 
235
224
  @typing.overload
236
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
225
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
237
226
  """
238
- Specifies the number of times the task corresponding
239
- to a step needs to be retried.
227
+ Specifies the resources needed when executing this step.
240
228
 
241
- This decorator is useful for handling transient errors, such as networking issues.
242
- If your task contains operations that can't be retried safely, e.g. database updates,
243
- it is advisable to annotate it with `@retry(times=0)`.
229
+ Use `@resources` to specify the resource requirements
230
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
244
231
 
245
- This can be used in conjunction with the `@catch` decorator. The `@catch`
246
- decorator will execute a no-op task after all retries have been exhausted,
247
- ensuring that the flow execution can continue.
232
+ You can choose the compute layer on the command line by executing e.g.
233
+ ```
234
+ python myflow.py run --with batch
235
+ ```
236
+ or
237
+ ```
238
+ python myflow.py run --with kubernetes
239
+ ```
240
+ which executes the flow on the desired system using the
241
+ requirements specified in `@resources`.
248
242
 
249
243
 
250
244
  Parameters
251
245
  ----------
252
- times : int, default 3
253
- Number of times to retry this task.
254
- minutes_between_retries : int, default 2
255
- Number of minutes between retries.
246
+ cpu : int, default 1
247
+ Number of CPUs required for this step.
248
+ gpu : int, optional, default None
249
+ Number of GPUs required for this step.
250
+ disk : int, optional, default None
251
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
252
+ memory : int, default 4096
253
+ Memory size (in MB) required for this step.
254
+ shared_memory : int, optional, default None
255
+ The value for the size (in MiB) of the /dev/shm volume for this step.
256
+ This parameter maps to the `--shm-size` option in Docker.
256
257
  """
257
258
  ...
258
259
 
259
260
  @typing.overload
260
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
261
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
261
262
  ...
262
263
 
263
264
  @typing.overload
264
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
265
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
265
266
  ...
266
267
 
267
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
268
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
268
269
  """
269
- Specifies the number of times the task corresponding
270
- to a step needs to be retried.
270
+ Specifies the resources needed when executing this step.
271
271
 
272
- This decorator is useful for handling transient errors, such as networking issues.
273
- If your task contains operations that can't be retried safely, e.g. database updates,
274
- it is advisable to annotate it with `@retry(times=0)`.
272
+ Use `@resources` to specify the resource requirements
273
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
275
274
 
276
- This can be used in conjunction with the `@catch` decorator. The `@catch`
277
- decorator will execute a no-op task after all retries have been exhausted,
278
- ensuring that the flow execution can continue.
275
+ You can choose the compute layer on the command line by executing e.g.
276
+ ```
277
+ python myflow.py run --with batch
278
+ ```
279
+ or
280
+ ```
281
+ python myflow.py run --with kubernetes
282
+ ```
283
+ which executes the flow on the desired system using the
284
+ requirements specified in `@resources`.
279
285
 
280
286
 
281
287
  Parameters
282
288
  ----------
283
- times : int, default 3
284
- Number of times to retry this task.
285
- minutes_between_retries : int, default 2
286
- Number of minutes between retries.
289
+ cpu : int, default 1
290
+ Number of CPUs required for this step.
291
+ gpu : int, optional, default None
292
+ Number of GPUs required for this step.
293
+ disk : int, optional, default None
294
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
295
+ memory : int, default 4096
296
+ Memory size (in MB) required for this step.
297
+ shared_memory : int, optional, default None
298
+ The value for the size (in MiB) of the /dev/shm volume for this step.
299
+ This parameter maps to the `--shm-size` option in Docker.
287
300
  """
288
301
  ...
289
302
 
290
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
303
+ @typing.overload
304
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
291
305
  """
292
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
306
+ Enables checkpointing for a step.
293
307
 
294
- User code call
295
- --------------
296
- @ollama(
297
- models=[...],
298
- ...
299
- )
308
+ > Examples
300
309
 
301
- Valid backend options
302
- ---------------------
303
- - 'local': Run as a separate process on the local task machine.
304
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
305
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
310
+ - Saving Checkpoints
306
311
 
307
- Valid model options
308
- -------------------
309
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
312
+ ```python
313
+ @checkpoint
314
+ @step
315
+ def train(self):
316
+ model = create_model(self.parameters, checkpoint_path = None)
317
+ for i in range(self.epochs):
318
+ # some training logic
319
+ loss = model.train(self.dataset)
320
+ if i % 10 == 0:
321
+ model.save(
322
+ current.checkpoint.directory,
323
+ )
324
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
325
+ # and returns a reference dictionary to the checkpoint saved in the datastore
326
+ self.latest_checkpoint = current.checkpoint.save(
327
+ name="epoch_checkpoint",
328
+ metadata={
329
+ "epoch": i,
330
+ "loss": loss,
331
+ }
332
+ )
333
+ ```
310
334
 
335
+ - Using Loaded Checkpoints
311
336
 
312
- Parameters
313
- ----------
314
- models: list[str]
315
- List of Ollama containers running models in sidecars.
316
- backend: str
317
- Determines where and how to run the Ollama process.
318
- force_pull: bool
319
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
320
- cache_update_policy: str
321
- Cache update policy: "auto", "force", or "never".
322
- force_cache_update: bool
323
- Simple override for "force" cache update policy.
324
- debug: bool
325
- Whether to turn on verbose debugging logs.
326
- circuit_breaker_config: dict
327
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
328
- timeout_config: dict
329
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
330
- """
331
- ...
332
-
333
- @typing.overload
334
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
335
- """
336
- Specifies secrets to be retrieved and injected as environment variables prior to
337
- the execution of a step.
337
+ ```python
338
+ @retry(times=3)
339
+ @checkpoint
340
+ @step
341
+ def train(self):
342
+ # Assume that the task has restarted and the previous attempt of the task
343
+ # saved a checkpoint
344
+ checkpoint_path = None
345
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
346
+ print("Loaded checkpoint from the previous attempt")
347
+ checkpoint_path = current.checkpoint.directory
348
+
349
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
350
+ for i in range(self.epochs):
351
+ ...
352
+ ```
338
353
 
339
354
 
340
355
  Parameters
341
356
  ----------
342
- sources : List[Union[str, Dict[str, Any]]], default: []
343
- List of secret specs, defining how the secrets are to be retrieved
344
- role : str, optional, default: None
345
- Role to use for fetching secrets
357
+ load_policy : str, default: "fresh"
358
+ The policy for loading the checkpoint. The following policies are supported:
359
+ - "eager": Loads the the latest available checkpoint within the namespace.
360
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
361
+ will be loaded at the start of the task.
362
+ - "none": Do not load any checkpoint
363
+ - "fresh": Loads the lastest checkpoint created within the running Task.
364
+ This mode helps loading checkpoints across various retry attempts of the same task.
365
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
366
+ created within the task will be loaded when the task is retries execution on failure.
367
+
368
+ temp_dir_root : str, default: None
369
+ The root directory under which `current.checkpoint.directory` will be created.
346
370
  """
347
371
  ...
348
372
 
349
373
  @typing.overload
350
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
374
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
351
375
  ...
352
376
 
353
377
  @typing.overload
354
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
378
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
355
379
  ...
356
380
 
357
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
381
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
358
382
  """
359
- Specifies secrets to be retrieved and injected as environment variables prior to
360
- the execution of a step.
383
+ Enables checkpointing for a step.
384
+
385
+ > Examples
386
+
387
+ - Saving Checkpoints
388
+
389
+ ```python
390
+ @checkpoint
391
+ @step
392
+ def train(self):
393
+ model = create_model(self.parameters, checkpoint_path = None)
394
+ for i in range(self.epochs):
395
+ # some training logic
396
+ loss = model.train(self.dataset)
397
+ if i % 10 == 0:
398
+ model.save(
399
+ current.checkpoint.directory,
400
+ )
401
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
402
+ # and returns a reference dictionary to the checkpoint saved in the datastore
403
+ self.latest_checkpoint = current.checkpoint.save(
404
+ name="epoch_checkpoint",
405
+ metadata={
406
+ "epoch": i,
407
+ "loss": loss,
408
+ }
409
+ )
410
+ ```
411
+
412
+ - Using Loaded Checkpoints
413
+
414
+ ```python
415
+ @retry(times=3)
416
+ @checkpoint
417
+ @step
418
+ def train(self):
419
+ # Assume that the task has restarted and the previous attempt of the task
420
+ # saved a checkpoint
421
+ checkpoint_path = None
422
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
423
+ print("Loaded checkpoint from the previous attempt")
424
+ checkpoint_path = current.checkpoint.directory
425
+
426
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
427
+ for i in range(self.epochs):
428
+ ...
429
+ ```
361
430
 
362
431
 
363
432
  Parameters
364
433
  ----------
365
- sources : List[Union[str, Dict[str, Any]]], default: []
366
- List of secret specs, defining how the secrets are to be retrieved
367
- role : str, optional, default: None
368
- Role to use for fetching secrets
434
+ load_policy : str, default: "fresh"
435
+ The policy for loading the checkpoint. The following policies are supported:
436
+ - "eager": Loads the the latest available checkpoint within the namespace.
437
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
438
+ will be loaded at the start of the task.
439
+ - "none": Do not load any checkpoint
440
+ - "fresh": Loads the lastest checkpoint created within the running Task.
441
+ This mode helps loading checkpoints across various retry attempts of the same task.
442
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
443
+ created within the task will be loaded when the task is retries execution on failure.
444
+
445
+ temp_dir_root : str, default: None
446
+ The root directory under which `current.checkpoint.directory` will be created.
369
447
  """
370
448
  ...
371
449
 
372
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
450
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
373
451
  """
374
452
  Specifies that this step should execute on DGX cloud.
375
453
 
@@ -380,194 +458,264 @@ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Cal
380
458
  Number of GPUs to use.
381
459
  gpu_type : str
382
460
  Type of Nvidia GPU to use.
461
+ queue_timeout : int
462
+ Time to keep the job in NVCF's queue.
383
463
  """
384
464
  ...
385
465
 
386
466
  @typing.overload
387
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
467
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
388
468
  """
389
- Enables loading / saving of models within a step.
390
-
391
- > Examples
392
- - Saving Models
393
- ```python
394
- @model
395
- @step
396
- def train(self):
397
- # current.model.save returns a dictionary reference to the model saved
398
- self.my_model = current.model.save(
399
- path_to_my_model,
400
- label="my_model",
401
- metadata={
402
- "epochs": 10,
403
- "batch-size": 32,
404
- "learning-rate": 0.001,
405
- }
406
- )
407
- self.next(self.test)
408
-
409
- @model(load="my_model")
410
- @step
411
- def test(self):
412
- # `current.model.loaded` returns a dictionary of the loaded models
413
- # where the key is the name of the artifact and the value is the path to the model
414
- print(os.listdir(current.model.loaded["my_model"]))
415
- self.next(self.end)
416
- ```
417
-
418
- - Loading models
419
- ```python
420
- @step
421
- def train(self):
422
- # current.model.load returns the path to the model loaded
423
- checkpoint_path = current.model.load(
424
- self.checkpoint_key,
425
- )
426
- model_path = current.model.load(
427
- self.model,
428
- )
429
- self.next(self.test)
430
- ```
431
-
432
-
433
- Parameters
434
- ----------
435
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
436
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
437
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
438
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
439
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
440
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
441
-
442
- temp_dir_root : str, default: None
443
- The root directory under which `current.model.loaded` will store loaded models
469
+ Internal decorator to support Fast bakery
444
470
  """
445
471
  ...
446
472
 
447
473
  @typing.overload
448
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
449
- ...
450
-
451
- @typing.overload
452
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
474
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
453
475
  ...
454
476
 
455
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
477
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
456
478
  """
457
- Enables loading / saving of models within a step.
458
-
459
- > Examples
460
- - Saving Models
461
- ```python
462
- @model
463
- @step
464
- def train(self):
465
- # current.model.save returns a dictionary reference to the model saved
466
- self.my_model = current.model.save(
467
- path_to_my_model,
468
- label="my_model",
469
- metadata={
470
- "epochs": 10,
471
- "batch-size": 32,
472
- "learning-rate": 0.001,
473
- }
474
- )
475
- self.next(self.test)
476
-
477
- @model(load="my_model")
478
- @step
479
- def test(self):
480
- # `current.model.loaded` returns a dictionary of the loaded models
481
- # where the key is the name of the artifact and the value is the path to the model
482
- print(os.listdir(current.model.loaded["my_model"]))
483
- self.next(self.end)
484
- ```
485
-
486
- - Loading models
487
- ```python
488
- @step
489
- def train(self):
490
- # current.model.load returns the path to the model loaded
491
- checkpoint_path = current.model.load(
492
- self.checkpoint_key,
493
- )
494
- model_path = current.model.load(
495
- self.model,
496
- )
497
- self.next(self.test)
498
- ```
499
-
500
-
501
- Parameters
502
- ----------
503
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
504
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
505
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
506
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
507
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
508
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
509
-
510
- temp_dir_root : str, default: None
511
- The root directory under which `current.model.loaded` will store loaded models
479
+ Internal decorator to support Fast bakery
512
480
  """
513
481
  ...
514
482
 
515
483
  @typing.overload
516
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
484
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
517
485
  """
518
- Specifies a timeout for your step.
519
-
520
- This decorator is useful if this step may hang indefinitely.
521
-
522
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
523
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
524
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
486
+ Specifies the Conda environment for the step.
525
487
 
526
- Note that all the values specified in parameters are added together so if you specify
527
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
488
+ Information in this decorator will augment any
489
+ attributes set in the `@conda_base` flow-level decorator. Hence,
490
+ you can use `@conda_base` to set packages required by all
491
+ steps and use `@conda` to specify step-specific overrides.
528
492
 
529
493
 
530
494
  Parameters
531
495
  ----------
532
- seconds : int, default 0
533
- Number of seconds to wait prior to timing out.
534
- minutes : int, default 0
535
- Number of minutes to wait prior to timing out.
536
- hours : int, default 0
537
- Number of hours to wait prior to timing out.
496
+ packages : Dict[str, str], default {}
497
+ Packages to use for this step. The key is the name of the package
498
+ and the value is the version to use.
499
+ libraries : Dict[str, str], default {}
500
+ Supported for backward compatibility. When used with packages, packages will take precedence.
501
+ python : str, optional, default None
502
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
503
+ that the version used will correspond to the version of the Python interpreter used to start the run.
504
+ disabled : bool, default False
505
+ If set to True, disables @conda.
538
506
  """
539
507
  ...
540
508
 
541
509
  @typing.overload
542
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
510
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
543
511
  ...
544
512
 
545
513
  @typing.overload
546
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
514
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
547
515
  ...
548
516
 
549
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
517
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
550
518
  """
551
- Specifies a timeout for your step.
552
-
553
- This decorator is useful if this step may hang indefinitely.
519
+ Specifies the Conda environment for the step.
554
520
 
555
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
556
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
557
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
521
+ Information in this decorator will augment any
522
+ attributes set in the `@conda_base` flow-level decorator. Hence,
523
+ you can use `@conda_base` to set packages required by all
524
+ steps and use `@conda` to specify step-specific overrides.
558
525
 
559
- Note that all the values specified in parameters are added together so if you specify
560
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
526
+
527
+ Parameters
528
+ ----------
529
+ packages : Dict[str, str], default {}
530
+ Packages to use for this step. The key is the name of the package
531
+ and the value is the version to use.
532
+ libraries : Dict[str, str], default {}
533
+ Supported for backward compatibility. When used with packages, packages will take precedence.
534
+ python : str, optional, default None
535
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
536
+ that the version used will correspond to the version of the Python interpreter used to start the run.
537
+ disabled : bool, default False
538
+ If set to True, disables @conda.
539
+ """
540
+ ...
541
+
542
+ @typing.overload
543
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
544
+ """
545
+ Specifies secrets to be retrieved and injected as environment variables prior to
546
+ the execution of a step.
561
547
 
562
548
 
563
549
  Parameters
564
550
  ----------
565
- seconds : int, default 0
566
- Number of seconds to wait prior to timing out.
567
- minutes : int, default 0
568
- Number of minutes to wait prior to timing out.
569
- hours : int, default 0
570
- Number of hours to wait prior to timing out.
551
+ sources : List[Union[str, Dict[str, Any]]], default: []
552
+ List of secret specs, defining how the secrets are to be retrieved
553
+ role : str, optional, default: None
554
+ Role to use for fetching secrets
555
+ """
556
+ ...
557
+
558
+ @typing.overload
559
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
560
+ ...
561
+
562
+ @typing.overload
563
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
564
+ ...
565
+
566
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
567
+ """
568
+ Specifies secrets to be retrieved and injected as environment variables prior to
569
+ the execution of a step.
570
+
571
+
572
+ Parameters
573
+ ----------
574
+ sources : List[Union[str, Dict[str, Any]]], default: []
575
+ List of secret specs, defining how the secrets are to be retrieved
576
+ role : str, optional, default: None
577
+ Role to use for fetching secrets
578
+ """
579
+ ...
580
+
581
+ @typing.overload
582
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
583
+ """
584
+ Specifies environment variables to be set prior to the execution of a step.
585
+
586
+
587
+ Parameters
588
+ ----------
589
+ vars : Dict[str, str], default {}
590
+ Dictionary of environment variables to set.
591
+ """
592
+ ...
593
+
594
+ @typing.overload
595
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
596
+ ...
597
+
598
+ @typing.overload
599
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
600
+ ...
601
+
602
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
603
+ """
604
+ Specifies environment variables to be set prior to the execution of a step.
605
+
606
+
607
+ Parameters
608
+ ----------
609
+ vars : Dict[str, str], default {}
610
+ Dictionary of environment variables to set.
611
+ """
612
+ ...
613
+
614
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
615
+ """
616
+ Specifies that this step should execute on Kubernetes.
617
+
618
+
619
+ Parameters
620
+ ----------
621
+ cpu : int, default 1
622
+ Number of CPUs required for this step. If `@resources` is
623
+ also present, the maximum value from all decorators is used.
624
+ memory : int, default 4096
625
+ Memory size (in MB) required for this step. If
626
+ `@resources` is also present, the maximum value from all decorators is
627
+ used.
628
+ disk : int, default 10240
629
+ Disk size (in MB) required for this step. If
630
+ `@resources` is also present, the maximum value from all decorators is
631
+ used.
632
+ image : str, optional, default None
633
+ Docker image to use when launching on Kubernetes. If not specified, and
634
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
635
+ not, a default Docker image mapping to the current version of Python is used.
636
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
637
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
638
+ image_pull_secrets: List[str], default []
639
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
640
+ Kubernetes image pull secrets to use when pulling container images
641
+ in Kubernetes.
642
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
643
+ Kubernetes service account to use when launching pod in Kubernetes.
644
+ secrets : List[str], optional, default None
645
+ Kubernetes secrets to use when launching pod in Kubernetes. These
646
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
647
+ in Metaflow configuration.
648
+ node_selector: Union[Dict[str,str], str], optional, default None
649
+ Kubernetes node selector(s) to apply to the pod running the task.
650
+ Can be passed in as a comma separated string of values e.g.
651
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
652
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
653
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
654
+ Kubernetes namespace to use when launching pod in Kubernetes.
655
+ gpu : int, optional, default None
656
+ Number of GPUs required for this step. A value of zero implies that
657
+ the scheduled node should not have GPUs.
658
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
659
+ The vendor of the GPUs to be used for this step.
660
+ tolerations : List[str], default []
661
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
662
+ Kubernetes tolerations to use when launching pod in Kubernetes.
663
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
664
+ Kubernetes labels to use when launching pod in Kubernetes.
665
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
666
+ Kubernetes annotations to use when launching pod in Kubernetes.
667
+ use_tmpfs : bool, default False
668
+ This enables an explicit tmpfs mount for this step.
669
+ tmpfs_tempdir : bool, default True
670
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
671
+ tmpfs_size : int, optional, default: None
672
+ The value for the size (in MiB) of the tmpfs mount for this step.
673
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
674
+ memory allocated for this step.
675
+ tmpfs_path : str, optional, default /metaflow_temp
676
+ Path to tmpfs mount for this step.
677
+ persistent_volume_claims : Dict[str, str], optional, default None
678
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
679
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
680
+ shared_memory: int, optional
681
+ Shared memory size (in MiB) required for this step
682
+ port: int, optional
683
+ Port number to specify in the Kubernetes job object
684
+ compute_pool : str, optional, default None
685
+ Compute pool to be used for for this step.
686
+ If not specified, any accessible compute pool within the perimeter is used.
687
+ hostname_resolution_timeout: int, default 10 * 60
688
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
689
+ Only applicable when @parallel is used.
690
+ qos: str, default: Burstable
691
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
692
+
693
+ security_context: Dict[str, Any], optional, default None
694
+ Container security context. Applies to the task container. Allows the following keys:
695
+ - privileged: bool, optional, default None
696
+ - allow_privilege_escalation: bool, optional, default None
697
+ - run_as_user: int, optional, default None
698
+ - run_as_group: int, optional, default None
699
+ - run_as_non_root: bool, optional, default None
700
+ """
701
+ ...
702
+
703
+ @typing.overload
704
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
705
+ """
706
+ Decorator prototype for all step decorators. This function gets specialized
707
+ and imported for all decorators types by _import_plugin_decorators().
708
+ """
709
+ ...
710
+
711
+ @typing.overload
712
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
713
+ ...
714
+
715
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
716
+ """
717
+ Decorator prototype for all step decorators. This function gets specialized
718
+ and imported for all decorators types by _import_plugin_decorators().
571
719
  """
572
720
  ...
573
721
 
@@ -652,329 +800,294 @@ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.
652
800
  ...
653
801
 
654
802
  @typing.overload
655
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
803
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
656
804
  """
657
- Specifies the Conda environment for the step.
805
+ Enables loading / saving of models within a step.
658
806
 
659
- Information in this decorator will augment any
660
- attributes set in the `@conda_base` flow-level decorator. Hence,
661
- you can use `@conda_base` to set packages required by all
662
- steps and use `@conda` to specify step-specific overrides.
807
+ > Examples
808
+ - Saving Models
809
+ ```python
810
+ @model
811
+ @step
812
+ def train(self):
813
+ # current.model.save returns a dictionary reference to the model saved
814
+ self.my_model = current.model.save(
815
+ path_to_my_model,
816
+ label="my_model",
817
+ metadata={
818
+ "epochs": 10,
819
+ "batch-size": 32,
820
+ "learning-rate": 0.001,
821
+ }
822
+ )
823
+ self.next(self.test)
824
+
825
+ @model(load="my_model")
826
+ @step
827
+ def test(self):
828
+ # `current.model.loaded` returns a dictionary of the loaded models
829
+ # where the key is the name of the artifact and the value is the path to the model
830
+ print(os.listdir(current.model.loaded["my_model"]))
831
+ self.next(self.end)
832
+ ```
833
+
834
+ - Loading models
835
+ ```python
836
+ @step
837
+ def train(self):
838
+ # current.model.load returns the path to the model loaded
839
+ checkpoint_path = current.model.load(
840
+ self.checkpoint_key,
841
+ )
842
+ model_path = current.model.load(
843
+ self.model,
844
+ )
845
+ self.next(self.test)
846
+ ```
663
847
 
664
848
 
665
849
  Parameters
666
850
  ----------
667
- packages : Dict[str, str], default {}
668
- Packages to use for this step. The key is the name of the package
669
- and the value is the version to use.
670
- libraries : Dict[str, str], default {}
671
- Supported for backward compatibility. When used with packages, packages will take precedence.
672
- python : str, optional, default None
673
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
674
- that the version used will correspond to the version of the Python interpreter used to start the run.
675
- disabled : bool, default False
676
- If set to True, disables @conda.
851
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
852
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
853
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
854
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
855
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
856
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
857
+
858
+ temp_dir_root : str, default: None
859
+ The root directory under which `current.model.loaded` will store loaded models
677
860
  """
678
861
  ...
679
862
 
680
863
  @typing.overload
681
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
864
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
682
865
  ...
683
866
 
684
867
  @typing.overload
685
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
868
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
686
869
  ...
687
870
 
688
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
871
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
689
872
  """
690
- Specifies the Conda environment for the step.
873
+ Enables loading / saving of models within a step.
691
874
 
692
- Information in this decorator will augment any
693
- attributes set in the `@conda_base` flow-level decorator. Hence,
694
- you can use `@conda_base` to set packages required by all
695
- steps and use `@conda` to specify step-specific overrides.
875
+ > Examples
876
+ - Saving Models
877
+ ```python
878
+ @model
879
+ @step
880
+ def train(self):
881
+ # current.model.save returns a dictionary reference to the model saved
882
+ self.my_model = current.model.save(
883
+ path_to_my_model,
884
+ label="my_model",
885
+ metadata={
886
+ "epochs": 10,
887
+ "batch-size": 32,
888
+ "learning-rate": 0.001,
889
+ }
890
+ )
891
+ self.next(self.test)
892
+
893
+ @model(load="my_model")
894
+ @step
895
+ def test(self):
896
+ # `current.model.loaded` returns a dictionary of the loaded models
897
+ # where the key is the name of the artifact and the value is the path to the model
898
+ print(os.listdir(current.model.loaded["my_model"]))
899
+ self.next(self.end)
900
+ ```
901
+
902
+ - Loading models
903
+ ```python
904
+ @step
905
+ def train(self):
906
+ # current.model.load returns the path to the model loaded
907
+ checkpoint_path = current.model.load(
908
+ self.checkpoint_key,
909
+ )
910
+ model_path = current.model.load(
911
+ self.model,
912
+ )
913
+ self.next(self.test)
914
+ ```
696
915
 
697
916
 
698
917
  Parameters
699
918
  ----------
700
- packages : Dict[str, str], default {}
701
- Packages to use for this step. The key is the name of the package
702
- and the value is the version to use.
703
- libraries : Dict[str, str], default {}
704
- Supported for backward compatibility. When used with packages, packages will take precedence.
705
- python : str, optional, default None
706
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
707
- that the version used will correspond to the version of the Python interpreter used to start the run.
708
- disabled : bool, default False
709
- If set to True, disables @conda.
919
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
920
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
921
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
922
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
923
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
924
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
925
+
926
+ temp_dir_root : str, default: None
927
+ The root directory under which `current.model.loaded` will store loaded models
710
928
  """
711
929
  ...
712
930
 
713
931
  @typing.overload
714
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
932
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
715
933
  """
716
- Specifies the resources needed when executing this step.
717
-
718
- Use `@resources` to specify the resource requirements
719
- independently of the specific compute layer (`@batch`, `@kubernetes`).
934
+ Specifies that the step will success under all circumstances.
720
935
 
721
- You can choose the compute layer on the command line by executing e.g.
722
- ```
723
- python myflow.py run --with batch
724
- ```
725
- or
726
- ```
727
- python myflow.py run --with kubernetes
728
- ```
729
- which executes the flow on the desired system using the
730
- requirements specified in `@resources`.
936
+ The decorator will create an optional artifact, specified by `var`, which
937
+ contains the exception raised. You can use it to detect the presence
938
+ of errors, indicating that all happy-path artifacts produced by the step
939
+ are missing.
731
940
 
732
941
 
733
942
  Parameters
734
943
  ----------
735
- cpu : int, default 1
736
- Number of CPUs required for this step.
737
- gpu : int, optional, default None
738
- Number of GPUs required for this step.
739
- disk : int, optional, default None
740
- Disk size (in MB) required for this step. Only applies on Kubernetes.
741
- memory : int, default 4096
742
- Memory size (in MB) required for this step.
743
- shared_memory : int, optional, default None
744
- The value for the size (in MiB) of the /dev/shm volume for this step.
745
- This parameter maps to the `--shm-size` option in Docker.
944
+ var : str, optional, default None
945
+ Name of the artifact in which to store the caught exception.
946
+ If not specified, the exception is not stored.
947
+ print_exception : bool, default True
948
+ Determines whether or not the exception is printed to
949
+ stdout when caught.
746
950
  """
747
951
  ...
748
952
 
749
953
  @typing.overload
750
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
954
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
751
955
  ...
752
956
 
753
957
  @typing.overload
754
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
958
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
755
959
  ...
756
960
 
757
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
961
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
758
962
  """
759
- Specifies the resources needed when executing this step.
760
-
761
- Use `@resources` to specify the resource requirements
762
- independently of the specific compute layer (`@batch`, `@kubernetes`).
963
+ Specifies that the step will success under all circumstances.
763
964
 
764
- You can choose the compute layer on the command line by executing e.g.
765
- ```
766
- python myflow.py run --with batch
767
- ```
768
- or
769
- ```
770
- python myflow.py run --with kubernetes
771
- ```
772
- which executes the flow on the desired system using the
773
- requirements specified in `@resources`.
965
+ The decorator will create an optional artifact, specified by `var`, which
966
+ contains the exception raised. You can use it to detect the presence
967
+ of errors, indicating that all happy-path artifacts produced by the step
968
+ are missing.
774
969
 
775
970
 
776
971
  Parameters
777
972
  ----------
778
- cpu : int, default 1
779
- Number of CPUs required for this step.
780
- gpu : int, optional, default None
781
- Number of GPUs required for this step.
782
- disk : int, optional, default None
783
- Disk size (in MB) required for this step. Only applies on Kubernetes.
784
- memory : int, default 4096
785
- Memory size (in MB) required for this step.
786
- shared_memory : int, optional, default None
787
- The value for the size (in MiB) of the /dev/shm volume for this step.
788
- This parameter maps to the `--shm-size` option in Docker.
973
+ var : str, optional, default None
974
+ Name of the artifact in which to store the caught exception.
975
+ If not specified, the exception is not stored.
976
+ print_exception : bool, default True
977
+ Determines whether or not the exception is printed to
978
+ stdout when caught.
789
979
  """
790
980
  ...
791
981
 
792
- @typing.overload
793
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
982
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
794
983
  """
795
- Specifies the PyPI packages for the step.
796
-
797
- Information in this decorator will augment any
798
- attributes set in the `@pyi_base` flow-level decorator. Hence,
799
- you can use `@pypi_base` to set packages required by all
800
- steps and use `@pypi` to specify step-specific overrides.
984
+ Specifies that this step should execute on DGX cloud.
801
985
 
802
986
 
803
987
  Parameters
804
988
  ----------
805
- packages : Dict[str, str], default: {}
806
- Packages to use for this step. The key is the name of the package
807
- and the value is the version to use.
808
- python : str, optional, default: None
809
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
810
- that the version used will correspond to the version of the Python interpreter used to start the run.
989
+ gpu : int
990
+ Number of GPUs to use.
991
+ gpu_type : str
992
+ Type of Nvidia GPU to use.
811
993
  """
812
994
  ...
813
995
 
814
- @typing.overload
815
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
816
- ...
817
-
818
- @typing.overload
819
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
820
- ...
821
-
822
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
996
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
823
997
  """
824
- Specifies the PyPI packages for the step.
998
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
825
999
 
826
- Information in this decorator will augment any
827
- attributes set in the `@pyi_base` flow-level decorator. Hence,
828
- you can use `@pypi_base` to set packages required by all
829
- steps and use `@pypi` to specify step-specific overrides.
1000
+ User code call
1001
+ --------------
1002
+ @ollama(
1003
+ models=[...],
1004
+ ...
1005
+ )
1006
+
1007
+ Valid backend options
1008
+ ---------------------
1009
+ - 'local': Run as a separate process on the local task machine.
1010
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1011
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1012
+
1013
+ Valid model options
1014
+ -------------------
1015
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
830
1016
 
831
1017
 
832
1018
  Parameters
833
1019
  ----------
834
- packages : Dict[str, str], default: {}
835
- Packages to use for this step. The key is the name of the package
836
- and the value is the version to use.
837
- python : str, optional, default: None
838
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
839
- that the version used will correspond to the version of the Python interpreter used to start the run.
1020
+ models: list[str]
1021
+ List of Ollama containers running models in sidecars.
1022
+ backend: str
1023
+ Determines where and how to run the Ollama process.
1024
+ force_pull: bool
1025
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1026
+ cache_update_policy: str
1027
+ Cache update policy: "auto", "force", or "never".
1028
+ force_cache_update: bool
1029
+ Simple override for "force" cache update policy.
1030
+ debug: bool
1031
+ Whether to turn on verbose debugging logs.
1032
+ circuit_breaker_config: dict
1033
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1034
+ timeout_config: dict
1035
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
840
1036
  """
841
1037
  ...
842
1038
 
843
1039
  @typing.overload
844
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1040
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
845
1041
  """
846
- Creates a human-readable report, a Metaflow Card, after this step completes.
1042
+ Specifies the number of times the task corresponding
1043
+ to a step needs to be retried.
847
1044
 
848
- Note that you may add multiple `@card` decorators in a step with different parameters.
1045
+ This decorator is useful for handling transient errors, such as networking issues.
1046
+ If your task contains operations that can't be retried safely, e.g. database updates,
1047
+ it is advisable to annotate it with `@retry(times=0)`.
1048
+
1049
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1050
+ decorator will execute a no-op task after all retries have been exhausted,
1051
+ ensuring that the flow execution can continue.
849
1052
 
850
1053
 
851
1054
  Parameters
852
1055
  ----------
853
- type : str, default 'default'
854
- Card type.
855
- id : str, optional, default None
856
- If multiple cards are present, use this id to identify this card.
857
- options : Dict[str, Any], default {}
858
- Options passed to the card. The contents depend on the card type.
859
- timeout : int, default 45
860
- Interrupt reporting if it takes more than this many seconds.
1056
+ times : int, default 3
1057
+ Number of times to retry this task.
1058
+ minutes_between_retries : int, default 2
1059
+ Number of minutes between retries.
861
1060
  """
862
1061
  ...
863
1062
 
864
1063
  @typing.overload
865
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1064
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
866
1065
  ...
867
1066
 
868
1067
  @typing.overload
869
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1068
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
870
1069
  ...
871
1070
 
872
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1071
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
873
1072
  """
874
- Creates a human-readable report, a Metaflow Card, after this step completes.
875
-
876
- Note that you may add multiple `@card` decorators in a step with different parameters.
1073
+ Specifies the number of times the task corresponding
1074
+ to a step needs to be retried.
877
1075
 
1076
+ This decorator is useful for handling transient errors, such as networking issues.
1077
+ If your task contains operations that can't be retried safely, e.g. database updates,
1078
+ it is advisable to annotate it with `@retry(times=0)`.
878
1079
 
879
- Parameters
880
- ----------
881
- type : str, default 'default'
882
- Card type.
883
- id : str, optional, default None
884
- If multiple cards are present, use this id to identify this card.
885
- options : Dict[str, Any], default {}
886
- Options passed to the card. The contents depend on the card type.
887
- timeout : int, default 45
888
- Interrupt reporting if it takes more than this many seconds.
889
- """
890
- ...
891
-
892
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
893
- """
894
- Specifies that this step should execute on Kubernetes.
1080
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1081
+ decorator will execute a no-op task after all retries have been exhausted,
1082
+ ensuring that the flow execution can continue.
895
1083
 
896
1084
 
897
1085
  Parameters
898
1086
  ----------
899
- cpu : int, default 1
900
- Number of CPUs required for this step. If `@resources` is
901
- also present, the maximum value from all decorators is used.
902
- memory : int, default 4096
903
- Memory size (in MB) required for this step. If
904
- `@resources` is also present, the maximum value from all decorators is
905
- used.
906
- disk : int, default 10240
907
- Disk size (in MB) required for this step. If
908
- `@resources` is also present, the maximum value from all decorators is
909
- used.
910
- image : str, optional, default None
911
- Docker image to use when launching on Kubernetes. If not specified, and
912
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
913
- not, a default Docker image mapping to the current version of Python is used.
914
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
915
- If given, the imagePullPolicy to be applied to the Docker image of the step.
916
- image_pull_secrets: List[str], default []
917
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
918
- Kubernetes image pull secrets to use when pulling container images
919
- in Kubernetes.
920
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
921
- Kubernetes service account to use when launching pod in Kubernetes.
922
- secrets : List[str], optional, default None
923
- Kubernetes secrets to use when launching pod in Kubernetes. These
924
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
925
- in Metaflow configuration.
926
- node_selector: Union[Dict[str,str], str], optional, default None
927
- Kubernetes node selector(s) to apply to the pod running the task.
928
- Can be passed in as a comma separated string of values e.g.
929
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
930
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
931
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
932
- Kubernetes namespace to use when launching pod in Kubernetes.
933
- gpu : int, optional, default None
934
- Number of GPUs required for this step. A value of zero implies that
935
- the scheduled node should not have GPUs.
936
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
937
- The vendor of the GPUs to be used for this step.
938
- tolerations : List[str], default []
939
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
940
- Kubernetes tolerations to use when launching pod in Kubernetes.
941
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
942
- Kubernetes labels to use when launching pod in Kubernetes.
943
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
944
- Kubernetes annotations to use when launching pod in Kubernetes.
945
- use_tmpfs : bool, default False
946
- This enables an explicit tmpfs mount for this step.
947
- tmpfs_tempdir : bool, default True
948
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
949
- tmpfs_size : int, optional, default: None
950
- The value for the size (in MiB) of the tmpfs mount for this step.
951
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
952
- memory allocated for this step.
953
- tmpfs_path : str, optional, default /metaflow_temp
954
- Path to tmpfs mount for this step.
955
- persistent_volume_claims : Dict[str, str], optional, default None
956
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
957
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
958
- shared_memory: int, optional
959
- Shared memory size (in MiB) required for this step
960
- port: int, optional
961
- Port number to specify in the Kubernetes job object
962
- compute_pool : str, optional, default None
963
- Compute pool to be used for for this step.
964
- If not specified, any accessible compute pool within the perimeter is used.
965
- hostname_resolution_timeout: int, default 10 * 60
966
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
967
- Only applicable when @parallel is used.
968
- qos: str, default: Burstable
969
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
970
-
971
- security_context: Dict[str, Any], optional, default None
972
- Container security context. Applies to the task container. Allows the following keys:
973
- - privileged: bool, optional, default None
974
- - allow_privilege_escalation: bool, optional, default None
975
- - run_as_user: int, optional, default None
976
- - run_as_group: int, optional, default None
977
- - run_as_non_root: bool, optional, default None
1087
+ times : int, default 3
1088
+ Number of times to retry this task.
1089
+ minutes_between_retries : int, default 2
1090
+ Number of minutes between retries.
978
1091
  """
979
1092
  ...
980
1093
 
@@ -998,300 +1111,403 @@ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None],
998
1111
  ...
999
1112
 
1000
1113
  @typing.overload
1001
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1114
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1002
1115
  """
1003
- Internal decorator to support Fast bakery
1116
+ Specifies the PyPI packages for the step.
1117
+
1118
+ Information in this decorator will augment any
1119
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1120
+ you can use `@pypi_base` to set packages required by all
1121
+ steps and use `@pypi` to specify step-specific overrides.
1122
+
1123
+
1124
+ Parameters
1125
+ ----------
1126
+ packages : Dict[str, str], default: {}
1127
+ Packages to use for this step. The key is the name of the package
1128
+ and the value is the version to use.
1129
+ python : str, optional, default: None
1130
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1131
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1004
1132
  """
1005
1133
  ...
1006
1134
 
1007
1135
  @typing.overload
1008
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1136
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1009
1137
  ...
1010
1138
 
1011
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1139
+ @typing.overload
1140
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1141
+ ...
1142
+
1143
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1012
1144
  """
1013
- Internal decorator to support Fast bakery
1145
+ Specifies the PyPI packages for the step.
1146
+
1147
+ Information in this decorator will augment any
1148
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1149
+ you can use `@pypi_base` to set packages required by all
1150
+ steps and use `@pypi` to specify step-specific overrides.
1151
+
1152
+
1153
+ Parameters
1154
+ ----------
1155
+ packages : Dict[str, str], default: {}
1156
+ Packages to use for this step. The key is the name of the package
1157
+ and the value is the version to use.
1158
+ python : str, optional, default: None
1159
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1160
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1161
+ """
1162
+ ...
1163
+
1164
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1165
+ """
1166
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
1167
+
1168
+ User code call
1169
+ --------------
1170
+ @vllm(
1171
+ model="...",
1172
+ ...
1173
+ )
1174
+
1175
+ Valid backend options
1176
+ ---------------------
1177
+ - 'local': Run as a separate process on the local task machine.
1178
+
1179
+ Valid model options
1180
+ -------------------
1181
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1182
+
1183
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1184
+ If you need multiple models, you must create multiple @vllm decorators.
1185
+
1186
+
1187
+ Parameters
1188
+ ----------
1189
+ model: str
1190
+ HuggingFace model identifier to be served by vLLM.
1191
+ backend: str
1192
+ Determines where and how to run the vLLM process.
1193
+ openai_api_server: bool
1194
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1195
+ Default is False (uses native engine).
1196
+ Set to True for backward compatibility with existing code.
1197
+ debug: bool
1198
+ Whether to turn on verbose debugging logs.
1199
+ card_refresh_interval: int
1200
+ Interval in seconds for refreshing the vLLM status card.
1201
+ Only used when openai_api_server=True.
1202
+ max_retries: int
1203
+ Maximum number of retries checking for vLLM server startup.
1204
+ Only used when openai_api_server=True.
1205
+ retry_alert_frequency: int
1206
+ Frequency of alert logs for vLLM server startup retries.
1207
+ Only used when openai_api_server=True.
1208
+ engine_args : dict
1209
+ Additional keyword arguments to pass to the vLLM engine.
1210
+ For example, `tensor_parallel_size=2`.
1014
1211
  """
1015
1212
  ...
1016
1213
 
1017
1214
  @typing.overload
1018
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1215
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1019
1216
  """
1020
- Specifies environment variables to be set prior to the execution of a step.
1217
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1218
+
1219
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1021
1220
 
1022
1221
 
1023
1222
  Parameters
1024
1223
  ----------
1025
- vars : Dict[str, str], default {}
1026
- Dictionary of environment variables to set.
1224
+ type : str, default 'default'
1225
+ Card type.
1226
+ id : str, optional, default None
1227
+ If multiple cards are present, use this id to identify this card.
1228
+ options : Dict[str, Any], default {}
1229
+ Options passed to the card. The contents depend on the card type.
1230
+ timeout : int, default 45
1231
+ Interrupt reporting if it takes more than this many seconds.
1027
1232
  """
1028
1233
  ...
1029
1234
 
1030
1235
  @typing.overload
1031
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1236
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1032
1237
  ...
1033
1238
 
1034
1239
  @typing.overload
1035
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1240
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1036
1241
  ...
1037
1242
 
1038
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1243
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1039
1244
  """
1040
- Specifies environment variables to be set prior to the execution of a step.
1245
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1246
+
1247
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1041
1248
 
1042
1249
 
1043
1250
  Parameters
1044
1251
  ----------
1045
- vars : Dict[str, str], default {}
1046
- Dictionary of environment variables to set.
1252
+ type : str, default 'default'
1253
+ Card type.
1254
+ id : str, optional, default None
1255
+ If multiple cards are present, use this id to identify this card.
1256
+ options : Dict[str, Any], default {}
1257
+ Options passed to the card. The contents depend on the card type.
1258
+ timeout : int, default 45
1259
+ Interrupt reporting if it takes more than this many seconds.
1047
1260
  """
1048
1261
  ...
1049
1262
 
1050
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1263
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1051
1264
  """
1052
- Specifies that this step should execute on DGX cloud.
1265
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1266
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1267
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1268
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1269
+ starts only after all sensors finish.
1053
1270
 
1054
1271
 
1055
1272
  Parameters
1056
1273
  ----------
1057
- gpu : int
1058
- Number of GPUs to use.
1059
- gpu_type : str
1060
- Type of Nvidia GPU to use.
1061
- queue_timeout : int
1062
- Time to keep the job in NVCF's queue.
1274
+ timeout : int
1275
+ Time, in seconds before the task times out and fails. (Default: 3600)
1276
+ poke_interval : int
1277
+ Time in seconds that the job should wait in between each try. (Default: 60)
1278
+ mode : str
1279
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1280
+ exponential_backoff : bool
1281
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1282
+ pool : str
1283
+ the slot pool this task should run in,
1284
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1285
+ soft_fail : bool
1286
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1287
+ name : str
1288
+ Name of the sensor on Airflow
1289
+ description : str
1290
+ Description of sensor in the Airflow UI
1291
+ bucket_key : Union[str, List[str]]
1292
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1293
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1294
+ bucket_name : str
1295
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1296
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1297
+ wildcard_match : bool
1298
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1299
+ aws_conn_id : str
1300
+ a reference to the s3 connection on Airflow. (Default: None)
1301
+ verify : bool
1302
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1063
1303
  """
1064
1304
  ...
1065
1305
 
1066
- @typing.overload
1067
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1306
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1068
1307
  """
1069
- Enables checkpointing for a step.
1308
+ Allows setting external datastores to save data for the
1309
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1070
1310
 
1071
- > Examples
1311
+ This decorator is useful when users wish to save data to a different datastore
1312
+ than what is configured in Metaflow. This can be for variety of reasons:
1072
1313
 
1073
- - Saving Checkpoints
1314
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1315
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1316
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1317
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1318
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1074
1319
 
1075
- ```python
1076
- @checkpoint
1077
- @step
1078
- def train(self):
1079
- model = create_model(self.parameters, checkpoint_path = None)
1080
- for i in range(self.epochs):
1081
- # some training logic
1082
- loss = model.train(self.dataset)
1083
- if i % 10 == 0:
1084
- model.save(
1085
- current.checkpoint.directory,
1086
- )
1087
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1088
- # and returns a reference dictionary to the checkpoint saved in the datastore
1089
- self.latest_checkpoint = current.checkpoint.save(
1090
- name="epoch_checkpoint",
1091
- metadata={
1092
- "epoch": i,
1093
- "loss": loss,
1094
- }
1095
- )
1096
- ```
1320
+ Usage:
1321
+ ----------
1097
1322
 
1098
- - Using Loaded Checkpoints
1323
+ - Using a custom IAM role to access the datastore.
1099
1324
 
1100
- ```python
1101
- @retry(times=3)
1102
- @checkpoint
1103
- @step
1104
- def train(self):
1105
- # Assume that the task has restarted and the previous attempt of the task
1106
- # saved a checkpoint
1107
- checkpoint_path = None
1108
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1109
- print("Loaded checkpoint from the previous attempt")
1110
- checkpoint_path = current.checkpoint.directory
1325
+ ```python
1326
+ @with_artifact_store(
1327
+ type="s3",
1328
+ config=lambda: {
1329
+ "root": "s3://my-bucket-foo/path/to/root",
1330
+ "role_arn": ROLE,
1331
+ },
1332
+ )
1333
+ class MyFlow(FlowSpec):
1111
1334
 
1112
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1113
- for i in range(self.epochs):
1114
- ...
1115
- ```
1335
+ @checkpoint
1336
+ @step
1337
+ def start(self):
1338
+ with open("my_file.txt", "w") as f:
1339
+ f.write("Hello, World!")
1340
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1341
+ self.next(self.end)
1116
1342
 
1343
+ ```
1117
1344
 
1118
- Parameters
1119
- ----------
1120
- load_policy : str, default: "fresh"
1121
- The policy for loading the checkpoint. The following policies are supported:
1122
- - "eager": Loads the the latest available checkpoint within the namespace.
1123
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1124
- will be loaded at the start of the task.
1125
- - "none": Do not load any checkpoint
1126
- - "fresh": Loads the lastest checkpoint created within the running Task.
1127
- This mode helps loading checkpoints across various retry attempts of the same task.
1128
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1129
- created within the task will be loaded when the task is retries execution on failure.
1345
+ - Using credentials to access the s3-compatible datastore.
1130
1346
 
1131
- temp_dir_root : str, default: None
1132
- The root directory under which `current.checkpoint.directory` will be created.
1133
- """
1134
- ...
1135
-
1136
- @typing.overload
1137
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1138
- ...
1139
-
1140
- @typing.overload
1141
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1142
- ...
1143
-
1144
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
1145
- """
1146
- Enables checkpointing for a step.
1347
+ ```python
1348
+ @with_artifact_store(
1349
+ type="s3",
1350
+ config=lambda: {
1351
+ "root": "s3://my-bucket-foo/path/to/root",
1352
+ "client_params": {
1353
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1354
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1355
+ },
1356
+ },
1357
+ )
1358
+ class MyFlow(FlowSpec):
1147
1359
 
1148
- > Examples
1360
+ @checkpoint
1361
+ @step
1362
+ def start(self):
1363
+ with open("my_file.txt", "w") as f:
1364
+ f.write("Hello, World!")
1365
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1366
+ self.next(self.end)
1149
1367
 
1150
- - Saving Checkpoints
1368
+ ```
1151
1369
 
1152
- ```python
1153
- @checkpoint
1154
- @step
1155
- def train(self):
1156
- model = create_model(self.parameters, checkpoint_path = None)
1157
- for i in range(self.epochs):
1158
- # some training logic
1159
- loss = model.train(self.dataset)
1160
- if i % 10 == 0:
1161
- model.save(
1162
- current.checkpoint.directory,
1163
- )
1164
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1165
- # and returns a reference dictionary to the checkpoint saved in the datastore
1166
- self.latest_checkpoint = current.checkpoint.save(
1167
- name="epoch_checkpoint",
1168
- metadata={
1169
- "epoch": i,
1170
- "loss": loss,
1171
- }
1370
+ - Accessing objects stored in external datastores after task execution.
1371
+
1372
+ ```python
1373
+ run = Run("CheckpointsTestsFlow/8992")
1374
+ with artifact_store_from(run=run, config={
1375
+ "client_params": {
1376
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1377
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1378
+ },
1379
+ }):
1380
+ with Checkpoint() as cp:
1381
+ latest = cp.list(
1382
+ task=run["start"].task
1383
+ )[0]
1384
+ print(latest)
1385
+ cp.load(
1386
+ latest,
1387
+ "test-checkpoints"
1172
1388
  )
1173
- ```
1174
1389
 
1175
- - Using Loaded Checkpoints
1390
+ task = Task("TorchTuneFlow/8484/train/53673")
1391
+ with artifact_store_from(run=run, config={
1392
+ "client_params": {
1393
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1394
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1395
+ },
1396
+ }):
1397
+ load_model(
1398
+ task.data.model_ref,
1399
+ "test-models"
1400
+ )
1401
+ ```
1402
+ Parameters:
1403
+ ----------
1176
1404
 
1177
- ```python
1178
- @retry(times=3)
1179
- @checkpoint
1180
- @step
1181
- def train(self):
1182
- # Assume that the task has restarted and the previous attempt of the task
1183
- # saved a checkpoint
1184
- checkpoint_path = None
1185
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1186
- print("Loaded checkpoint from the previous attempt")
1187
- checkpoint_path = current.checkpoint.directory
1405
+ type: str
1406
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1188
1407
 
1189
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1190
- for i in range(self.epochs):
1191
- ...
1192
- ```
1408
+ config: dict or Callable
1409
+ Dictionary of configuration options for the datastore. The following keys are required:
1410
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1411
+ - example: 's3://bucket-name/path/to/root'
1412
+ - example: 'gs://bucket-name/path/to/root'
1413
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1414
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1415
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1416
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1417
+ """
1418
+ ...
1419
+
1420
+ @typing.overload
1421
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1422
+ """
1423
+ Specifies the times when the flow should be run when running on a
1424
+ production scheduler.
1193
1425
 
1194
1426
 
1195
1427
  Parameters
1196
1428
  ----------
1197
- load_policy : str, default: "fresh"
1198
- The policy for loading the checkpoint. The following policies are supported:
1199
- - "eager": Loads the the latest available checkpoint within the namespace.
1200
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1201
- will be loaded at the start of the task.
1202
- - "none": Do not load any checkpoint
1203
- - "fresh": Loads the lastest checkpoint created within the running Task.
1204
- This mode helps loading checkpoints across various retry attempts of the same task.
1205
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1206
- created within the task will be loaded when the task is retries execution on failure.
1207
-
1208
- temp_dir_root : str, default: None
1209
- The root directory under which `current.checkpoint.directory` will be created.
1429
+ hourly : bool, default False
1430
+ Run the workflow hourly.
1431
+ daily : bool, default True
1432
+ Run the workflow daily.
1433
+ weekly : bool, default False
1434
+ Run the workflow weekly.
1435
+ cron : str, optional, default None
1436
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1437
+ specified by this expression.
1438
+ timezone : str, optional, default None
1439
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1440
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1210
1441
  """
1211
1442
  ...
1212
1443
 
1213
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1444
+ @typing.overload
1445
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1446
+ ...
1447
+
1448
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1214
1449
  """
1215
- This decorator is used to run vllm APIs as Metaflow task sidecars.
1216
-
1217
- User code call
1218
- --------------
1219
- @vllm(
1220
- model="...",
1221
- ...
1222
- )
1223
-
1224
- Valid backend options
1225
- ---------------------
1226
- - 'local': Run as a separate process on the local task machine.
1227
-
1228
- Valid model options
1229
- -------------------
1230
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1231
-
1232
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1233
- If you need multiple models, you must create multiple @vllm decorators.
1450
+ Specifies the times when the flow should be run when running on a
1451
+ production scheduler.
1234
1452
 
1235
1453
 
1236
1454
  Parameters
1237
1455
  ----------
1238
- model: str
1239
- HuggingFace model identifier to be served by vLLM.
1240
- backend: str
1241
- Determines where and how to run the vLLM process.
1242
- openai_api_server: bool
1243
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1244
- Default is False (uses native engine).
1245
- Set to True for backward compatibility with existing code.
1246
- debug: bool
1247
- Whether to turn on verbose debugging logs.
1248
- card_refresh_interval: int
1249
- Interval in seconds for refreshing the vLLM status card.
1250
- Only used when openai_api_server=True.
1251
- max_retries: int
1252
- Maximum number of retries checking for vLLM server startup.
1253
- Only used when openai_api_server=True.
1254
- retry_alert_frequency: int
1255
- Frequency of alert logs for vLLM server startup retries.
1256
- Only used when openai_api_server=True.
1257
- engine_args : dict
1258
- Additional keyword arguments to pass to the vLLM engine.
1259
- For example, `tensor_parallel_size=2`.
1456
+ hourly : bool, default False
1457
+ Run the workflow hourly.
1458
+ daily : bool, default True
1459
+ Run the workflow daily.
1460
+ weekly : bool, default False
1461
+ Run the workflow weekly.
1462
+ cron : str, optional, default None
1463
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1464
+ specified by this expression.
1465
+ timezone : str, optional, default None
1466
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1467
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1260
1468
  """
1261
1469
  ...
1262
1470
 
1263
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1471
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1264
1472
  """
1265
- Specifies what flows belong to the same project.
1266
-
1267
- A project-specific namespace is created for all flows that
1268
- use the same `@project(name)`.
1473
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1474
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1269
1475
 
1270
1476
 
1271
1477
  Parameters
1272
1478
  ----------
1479
+ timeout : int
1480
+ Time, in seconds before the task times out and fails. (Default: 3600)
1481
+ poke_interval : int
1482
+ Time in seconds that the job should wait in between each try. (Default: 60)
1483
+ mode : str
1484
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1485
+ exponential_backoff : bool
1486
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1487
+ pool : str
1488
+ the slot pool this task should run in,
1489
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1490
+ soft_fail : bool
1491
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1273
1492
  name : str
1274
- Project name. Make sure that the name is unique amongst all
1275
- projects that use the same production scheduler. The name may
1276
- contain only lowercase alphanumeric characters and underscores.
1277
-
1278
- branch : Optional[str], default None
1279
- The branch to use. If not specified, the branch is set to
1280
- `user.<username>` unless `production` is set to `True`. This can
1281
- also be set on the command line using `--branch` as a top-level option.
1282
- It is an error to specify `branch` in the decorator and on the command line.
1283
-
1284
- production : bool, default False
1285
- Whether or not the branch is the production branch. This can also be set on the
1286
- command line using `--production` as a top-level option. It is an error to specify
1287
- `production` in the decorator and on the command line.
1288
- The project branch name will be:
1289
- - if `branch` is specified:
1290
- - if `production` is True: `prod.<branch>`
1291
- - if `production` is False: `test.<branch>`
1292
- - if `branch` is not specified:
1293
- - if `production` is True: `prod`
1294
- - if `production` is False: `user.<username>`
1493
+ Name of the sensor on Airflow
1494
+ description : str
1495
+ Description of sensor in the Airflow UI
1496
+ external_dag_id : str
1497
+ The dag_id that contains the task you want to wait for.
1498
+ external_task_ids : List[str]
1499
+ The list of task_ids that you want to wait for.
1500
+ If None (default value) the sensor waits for the DAG. (Default: None)
1501
+ allowed_states : List[str]
1502
+ Iterable of allowed states, (Default: ['success'])
1503
+ failed_states : List[str]
1504
+ Iterable of failed or dis-allowed states. (Default: None)
1505
+ execution_delta : datetime.timedelta
1506
+ time difference with the previous execution to look at,
1507
+ the default is the same logical date as the current task or DAG. (Default: None)
1508
+ check_existence: bool
1509
+ Set to True to check if the external task exists or check if
1510
+ the DAG to wait for exists. (Default: True)
1295
1511
  """
1296
1512
  ...
1297
1513
 
@@ -1378,186 +1594,56 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1378
1594
  @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1379
1595
  ```
1380
1596
 
1381
- Note that `branch` is typically one of:
1382
- - `prod`
1383
- - `user.bob`
1384
- - `test.my_experiment`
1385
- - `prod.staging`
1386
-
1387
-
1388
- Parameters
1389
- ----------
1390
- flow : Union[str, Dict[str, str]], optional, default None
1391
- Upstream flow dependency for this flow.
1392
- flows : List[Union[str, Dict[str, str]]], default []
1393
- Upstream flow dependencies for this flow.
1394
- options : Dict[str, Any], default {}
1395
- Backend-specific configuration for tuning eventing behavior.
1396
- """
1397
- ...
1398
-
1399
- @typing.overload
1400
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1401
- """
1402
- Specifies the times when the flow should be run when running on a
1403
- production scheduler.
1404
-
1405
-
1406
- Parameters
1407
- ----------
1408
- hourly : bool, default False
1409
- Run the workflow hourly.
1410
- daily : bool, default True
1411
- Run the workflow daily.
1412
- weekly : bool, default False
1413
- Run the workflow weekly.
1414
- cron : str, optional, default None
1415
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1416
- specified by this expression.
1417
- timezone : str, optional, default None
1418
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1419
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1420
- """
1421
- ...
1422
-
1423
- @typing.overload
1424
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1425
- ...
1426
-
1427
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1428
- """
1429
- Specifies the times when the flow should be run when running on a
1430
- production scheduler.
1431
-
1432
-
1433
- Parameters
1434
- ----------
1435
- hourly : bool, default False
1436
- Run the workflow hourly.
1437
- daily : bool, default True
1438
- Run the workflow daily.
1439
- weekly : bool, default False
1440
- Run the workflow weekly.
1441
- cron : str, optional, default None
1442
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1443
- specified by this expression.
1444
- timezone : str, optional, default None
1445
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1446
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1447
- """
1448
- ...
1449
-
1450
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1451
- """
1452
- Allows setting external datastores to save data for the
1453
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1454
-
1455
- This decorator is useful when users wish to save data to a different datastore
1456
- than what is configured in Metaflow. This can be for variety of reasons:
1457
-
1458
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1459
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1460
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1461
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1462
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1463
-
1464
- Usage:
1465
- ----------
1466
-
1467
- - Using a custom IAM role to access the datastore.
1468
-
1469
- ```python
1470
- @with_artifact_store(
1471
- type="s3",
1472
- config=lambda: {
1473
- "root": "s3://my-bucket-foo/path/to/root",
1474
- "role_arn": ROLE,
1475
- },
1476
- )
1477
- class MyFlow(FlowSpec):
1478
-
1479
- @checkpoint
1480
- @step
1481
- def start(self):
1482
- with open("my_file.txt", "w") as f:
1483
- f.write("Hello, World!")
1484
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1485
- self.next(self.end)
1486
-
1487
- ```
1488
-
1489
- - Using credentials to access the s3-compatible datastore.
1490
-
1491
- ```python
1492
- @with_artifact_store(
1493
- type="s3",
1494
- config=lambda: {
1495
- "root": "s3://my-bucket-foo/path/to/root",
1496
- "client_params": {
1497
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1498
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1499
- },
1500
- },
1501
- )
1502
- class MyFlow(FlowSpec):
1597
+ Note that `branch` is typically one of:
1598
+ - `prod`
1599
+ - `user.bob`
1600
+ - `test.my_experiment`
1601
+ - `prod.staging`
1503
1602
 
1504
- @checkpoint
1505
- @step
1506
- def start(self):
1507
- with open("my_file.txt", "w") as f:
1508
- f.write("Hello, World!")
1509
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1510
- self.next(self.end)
1511
1603
 
1512
- ```
1604
+ Parameters
1605
+ ----------
1606
+ flow : Union[str, Dict[str, str]], optional, default None
1607
+ Upstream flow dependency for this flow.
1608
+ flows : List[Union[str, Dict[str, str]]], default []
1609
+ Upstream flow dependencies for this flow.
1610
+ options : Dict[str, Any], default {}
1611
+ Backend-specific configuration for tuning eventing behavior.
1612
+ """
1613
+ ...
1614
+
1615
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1616
+ """
1617
+ Specifies what flows belong to the same project.
1513
1618
 
1514
- - Accessing objects stored in external datastores after task execution.
1619
+ A project-specific namespace is created for all flows that
1620
+ use the same `@project(name)`.
1515
1621
 
1516
- ```python
1517
- run = Run("CheckpointsTestsFlow/8992")
1518
- with artifact_store_from(run=run, config={
1519
- "client_params": {
1520
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1521
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1522
- },
1523
- }):
1524
- with Checkpoint() as cp:
1525
- latest = cp.list(
1526
- task=run["start"].task
1527
- )[0]
1528
- print(latest)
1529
- cp.load(
1530
- latest,
1531
- "test-checkpoints"
1532
- )
1533
1622
 
1534
- task = Task("TorchTuneFlow/8484/train/53673")
1535
- with artifact_store_from(run=run, config={
1536
- "client_params": {
1537
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1538
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1539
- },
1540
- }):
1541
- load_model(
1542
- task.data.model_ref,
1543
- "test-models"
1544
- )
1545
- ```
1546
- Parameters:
1623
+ Parameters
1547
1624
  ----------
1625
+ name : str
1626
+ Project name. Make sure that the name is unique amongst all
1627
+ projects that use the same production scheduler. The name may
1628
+ contain only lowercase alphanumeric characters and underscores.
1548
1629
 
1549
- type: str
1550
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1630
+ branch : Optional[str], default None
1631
+ The branch to use. If not specified, the branch is set to
1632
+ `user.<username>` unless `production` is set to `True`. This can
1633
+ also be set on the command line using `--branch` as a top-level option.
1634
+ It is an error to specify `branch` in the decorator and on the command line.
1551
1635
 
1552
- config: dict or Callable
1553
- Dictionary of configuration options for the datastore. The following keys are required:
1554
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1555
- - example: 's3://bucket-name/path/to/root'
1556
- - example: 'gs://bucket-name/path/to/root'
1557
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1558
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1559
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1560
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1636
+ production : bool, default False
1637
+ Whether or not the branch is the production branch. This can also be set on the
1638
+ command line using `--production` as a top-level option. It is an error to specify
1639
+ `production` in the decorator and on the command line.
1640
+ The project branch name will be:
1641
+ - if `branch` is specified:
1642
+ - if `production` is True: `prod.<branch>`
1643
+ - if `production` is False: `test.<branch>`
1644
+ - if `branch` is not specified:
1645
+ - if `production` is True: `prod`
1646
+ - if `production` is False: `user.<username>`
1561
1647
  """
1562
1648
  ...
1563
1649
 
@@ -1654,78 +1740,49 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1654
1740
  """
1655
1741
  ...
1656
1742
 
1657
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1743
+ @typing.overload
1744
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1658
1745
  """
1659
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1660
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1746
+ Specifies the PyPI packages for all steps of the flow.
1661
1747
 
1748
+ Use `@pypi_base` to set common packages required by all
1749
+ steps and use `@pypi` to specify step-specific overrides.
1662
1750
 
1663
1751
  Parameters
1664
1752
  ----------
1665
- timeout : int
1666
- Time, in seconds before the task times out and fails. (Default: 3600)
1667
- poke_interval : int
1668
- Time in seconds that the job should wait in between each try. (Default: 60)
1669
- mode : str
1670
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1671
- exponential_backoff : bool
1672
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1673
- pool : str
1674
- the slot pool this task should run in,
1675
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1676
- soft_fail : bool
1677
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1678
- name : str
1679
- Name of the sensor on Airflow
1680
- description : str
1681
- Description of sensor in the Airflow UI
1682
- external_dag_id : str
1683
- The dag_id that contains the task you want to wait for.
1684
- external_task_ids : List[str]
1685
- The list of task_ids that you want to wait for.
1686
- If None (default value) the sensor waits for the DAG. (Default: None)
1687
- allowed_states : List[str]
1688
- Iterable of allowed states, (Default: ['success'])
1689
- failed_states : List[str]
1690
- Iterable of failed or dis-allowed states. (Default: None)
1691
- execution_delta : datetime.timedelta
1692
- time difference with the previous execution to look at,
1693
- the default is the same logical date as the current task or DAG. (Default: None)
1694
- check_existence: bool
1695
- Set to True to check if the external task exists or check if
1696
- the DAG to wait for exists. (Default: True)
1753
+ packages : Dict[str, str], default: {}
1754
+ Packages to use for this flow. The key is the name of the package
1755
+ and the value is the version to use.
1756
+ python : str, optional, default: None
1757
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1758
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1697
1759
  """
1698
1760
  ...
1699
1761
 
1700
1762
  @typing.overload
1701
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1763
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1764
+ ...
1765
+
1766
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1702
1767
  """
1703
- Specifies the Conda environment for all steps of the flow.
1704
-
1705
- Use `@conda_base` to set common libraries required by all
1706
- steps and use `@conda` to specify step-specific additions.
1768
+ Specifies the PyPI packages for all steps of the flow.
1707
1769
 
1770
+ Use `@pypi_base` to set common packages required by all
1771
+ steps and use `@pypi` to specify step-specific overrides.
1708
1772
 
1709
1773
  Parameters
1710
1774
  ----------
1711
- packages : Dict[str, str], default {}
1775
+ packages : Dict[str, str], default: {}
1712
1776
  Packages to use for this flow. The key is the name of the package
1713
1777
  and the value is the version to use.
1714
- libraries : Dict[str, str], default {}
1715
- Supported for backward compatibility. When used with packages, packages will take precedence.
1716
- python : str, optional, default None
1778
+ python : str, optional, default: None
1717
1779
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1718
1780
  that the version used will correspond to the version of the Python interpreter used to start the run.
1719
- disabled : bool, default False
1720
- If set to True, disables Conda.
1721
1781
  """
1722
1782
  ...
1723
1783
 
1724
1784
  @typing.overload
1725
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1726
- ...
1727
-
1728
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1785
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1729
1786
  """
1730
1787
  Specifies the Conda environment for all steps of the flow.
1731
1788
 
@@ -1749,86 +1806,29 @@ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packa
1749
1806
  ...
1750
1807
 
1751
1808
  @typing.overload
1752
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1753
- """
1754
- Specifies the PyPI packages for all steps of the flow.
1755
-
1756
- Use `@pypi_base` to set common packages required by all
1757
- steps and use `@pypi` to specify step-specific overrides.
1758
-
1759
- Parameters
1760
- ----------
1761
- packages : Dict[str, str], default: {}
1762
- Packages to use for this flow. The key is the name of the package
1763
- and the value is the version to use.
1764
- python : str, optional, default: None
1765
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1766
- that the version used will correspond to the version of the Python interpreter used to start the run.
1767
- """
1768
- ...
1769
-
1770
- @typing.overload
1771
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1809
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1772
1810
  ...
1773
1811
 
1774
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1812
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1775
1813
  """
1776
- Specifies the PyPI packages for all steps of the flow.
1814
+ Specifies the Conda environment for all steps of the flow.
1815
+
1816
+ Use `@conda_base` to set common libraries required by all
1817
+ steps and use `@conda` to specify step-specific additions.
1777
1818
 
1778
- Use `@pypi_base` to set common packages required by all
1779
- steps and use `@pypi` to specify step-specific overrides.
1780
1819
 
1781
1820
  Parameters
1782
1821
  ----------
1783
- packages : Dict[str, str], default: {}
1822
+ packages : Dict[str, str], default {}
1784
1823
  Packages to use for this flow. The key is the name of the package
1785
1824
  and the value is the version to use.
1786
- python : str, optional, default: None
1825
+ libraries : Dict[str, str], default {}
1826
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1827
+ python : str, optional, default None
1787
1828
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1788
1829
  that the version used will correspond to the version of the Python interpreter used to start the run.
1789
- """
1790
- ...
1791
-
1792
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1793
- """
1794
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1795
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1796
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1797
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1798
- starts only after all sensors finish.
1799
-
1800
-
1801
- Parameters
1802
- ----------
1803
- timeout : int
1804
- Time, in seconds before the task times out and fails. (Default: 3600)
1805
- poke_interval : int
1806
- Time in seconds that the job should wait in between each try. (Default: 60)
1807
- mode : str
1808
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1809
- exponential_backoff : bool
1810
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1811
- pool : str
1812
- the slot pool this task should run in,
1813
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1814
- soft_fail : bool
1815
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1816
- name : str
1817
- Name of the sensor on Airflow
1818
- description : str
1819
- Description of sensor in the Airflow UI
1820
- bucket_key : Union[str, List[str]]
1821
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1822
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1823
- bucket_name : str
1824
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1825
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1826
- wildcard_match : bool
1827
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1828
- aws_conn_id : str
1829
- a reference to the s3 connection on Airflow. (Default: None)
1830
- verify : bool
1831
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1830
+ disabled : bool, default False
1831
+ If set to True, disables Conda.
1832
1832
  """
1833
1833
  ...
1834
1834