ob-metaflow-stubs 6.0.4.8rc1__py2.py3-none-any.whl → 6.0.4.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. metaflow-stubs/__init__.pyi +917 -923
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -3
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +7 -6
  8. metaflow-stubs/client/filecache.pyi +3 -3
  9. metaflow-stubs/events.pyi +3 -3
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +8 -8
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/{meta_files.pyi → info_file.pyi} +6 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +2 -6
  20. metaflow-stubs/metaflow_current.pyi +37 -37
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +3 -3
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +3 -3
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +3 -3
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +4 -4
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +4 -4
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +2 -2
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +3 -3
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +3 -3
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +3 -3
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +3 -3
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +2 -2
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +3 -3
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +2 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +4 -4
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +4 -4
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +2 -2
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +3 -3
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +8 -12
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +3 -3
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +8 -13
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +8 -11
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +2 -2
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +4 -4
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +4 -4
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +2 -2
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +2 -2
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -2
  115. metaflow-stubs/multicore_utils.pyi +2 -2
  116. metaflow-stubs/ob_internal.pyi +2 -2
  117. metaflow-stubs/parameters.pyi +3 -3
  118. metaflow-stubs/plugins/__init__.pyi +9 -9
  119. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  120. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  121. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  122. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  123. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  124. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  125. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  126. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  127. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  128. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  129. metaflow-stubs/plugins/argo/argo_workflows.pyi +33 -4
  130. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +4 -4
  131. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  132. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +24 -2
  133. metaflow-stubs/plugins/argo/exit_hooks.pyi +2 -2
  134. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  135. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  136. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  137. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  138. metaflow-stubs/plugins/aws/batch/batch.pyi +4 -4
  139. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  140. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -4
  141. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  142. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +5 -5
  143. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  144. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  145. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  146. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +3 -3
  147. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  148. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +4 -4
  149. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +14 -3
  150. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  151. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  152. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  153. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +5 -5
  154. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  155. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  156. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  157. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  158. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  159. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  160. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  161. metaflow-stubs/plugins/cards/card_decorator.pyi +4 -2
  162. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  163. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  164. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_modules/components.pyi +4 -4
  166. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  167. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  169. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  170. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  171. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  172. metaflow-stubs/plugins/catch_decorator.pyi +3 -3
  173. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  174. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  175. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  176. metaflow-stubs/plugins/datatools/s3/s3.pyi +4 -4
  177. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  178. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  179. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  180. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  181. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  182. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  183. metaflow-stubs/plugins/exit_hook/__init__.pyi +2 -2
  184. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +2 -2
  185. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  186. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  187. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  188. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +5 -5
  189. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  190. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  191. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  192. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  193. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  194. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  195. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +4 -4
  196. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  197. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +3 -5
  198. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +3 -2
  199. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  200. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  201. metaflow-stubs/plugins/parallel_decorator.pyi +3 -3
  202. metaflow-stubs/plugins/perimeters.pyi +2 -2
  203. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  204. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  205. metaflow-stubs/plugins/pypi/conda_decorator.pyi +8 -5
  206. metaflow-stubs/plugins/pypi/conda_environment.pyi +5 -6
  207. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  208. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +4 -4
  209. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  210. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  211. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  212. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  213. metaflow-stubs/plugins/secrets/__init__.pyi +3 -3
  214. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +3 -3
  215. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  216. metaflow-stubs/plugins/secrets/secrets_func.pyi +2 -2
  217. metaflow-stubs/plugins/secrets/secrets_spec.pyi +2 -2
  218. metaflow-stubs/plugins/secrets/utils.pyi +2 -2
  219. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  220. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  221. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +4 -4
  222. metaflow-stubs/plugins/timeout_decorator.pyi +3 -3
  223. metaflow-stubs/plugins/torchtune/__init__.pyi +2 -2
  224. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  225. metaflow-stubs/plugins/uv/uv_environment.pyi +3 -4
  226. metaflow-stubs/profilers/__init__.pyi +2 -2
  227. metaflow-stubs/pylint_wrapper.pyi +2 -2
  228. metaflow-stubs/runner/__init__.pyi +2 -2
  229. metaflow-stubs/runner/deployer.pyi +35 -4
  230. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  231. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  232. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  233. metaflow-stubs/runner/nbrun.pyi +2 -2
  234. metaflow-stubs/runner/subprocess_manager.pyi +2 -3
  235. metaflow-stubs/runner/utils.pyi +3 -3
  236. metaflow-stubs/system/__init__.pyi +2 -2
  237. metaflow-stubs/system/system_logger.pyi +2 -2
  238. metaflow-stubs/system/system_monitor.pyi +2 -2
  239. metaflow-stubs/tagging_util.pyi +2 -2
  240. metaflow-stubs/tuple_util.pyi +2 -2
  241. metaflow-stubs/user_configs/__init__.pyi +3 -2
  242. metaflow-stubs/user_configs/config_decorators.pyi +251 -0
  243. metaflow-stubs/user_configs/config_options.pyi +4 -3
  244. metaflow-stubs/user_configs/config_parameters.pyi +7 -5
  245. {ob_metaflow_stubs-6.0.4.8rc1.dist-info → ob_metaflow_stubs-6.0.4.9.dist-info}/METADATA +1 -1
  246. ob_metaflow_stubs-6.0.4.9.dist-info/RECORD +249 -0
  247. metaflow-stubs/packaging_sys/__init__.pyi +0 -430
  248. metaflow-stubs/packaging_sys/backend.pyi +0 -73
  249. metaflow-stubs/packaging_sys/distribution_support.pyi +0 -57
  250. metaflow-stubs/packaging_sys/tar_backend.pyi +0 -53
  251. metaflow-stubs/packaging_sys/utils.pyi +0 -26
  252. metaflow-stubs/packaging_sys/v1.pyi +0 -145
  253. metaflow-stubs/user_decorators/__init__.pyi +0 -15
  254. metaflow-stubs/user_decorators/common.pyi +0 -38
  255. metaflow-stubs/user_decorators/mutable_flow.pyi +0 -223
  256. metaflow-stubs/user_decorators/mutable_step.pyi +0 -152
  257. metaflow-stubs/user_decorators/user_flow_decorator.pyi +0 -137
  258. metaflow-stubs/user_decorators/user_step_decorator.pyi +0 -323
  259. ob_metaflow_stubs-6.0.4.8rc1.dist-info/RECORD +0 -260
  260. {ob_metaflow_stubs-6.0.4.8rc1.dist-info → ob_metaflow_stubs-6.0.4.9.dist-info}/WHEEL +0 -0
  261. {ob_metaflow_stubs-6.0.4.8rc1.dist-info → ob_metaflow_stubs-6.0.4.9.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.16.5.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-28T18:04:46.561163 #
3
+ # MF version: 2.15.21.5+obcheckpoint(0.2.4);ob(v1) #
4
+ # Generated on 2025-07-30T20:52:28.447575 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -13,8 +13,7 @@ if typing.TYPE_CHECKING:
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
16
- from . import meta_files as meta_files
17
- from . import packaging_sys as packaging_sys
16
+ from . import info_file as info_file
18
17
  from . import exception as exception
19
18
  from . import metaflow_config as metaflow_config
20
19
  from . import multicore_utils as multicore_utils
@@ -24,7 +23,6 @@ from . import metaflow_current as metaflow_current
24
23
  from .metaflow_current import current as current
25
24
  from . import parameters as parameters
26
25
  from . import user_configs as user_configs
27
- from . import user_decorators as user_decorators
28
26
  from . import tagging_util as tagging_util
29
27
  from . import metadata_provider as metadata_provider
30
28
  from . import flowspec as flowspec
@@ -35,12 +33,10 @@ from .parameters import JSONType as JSONType
35
33
  from .user_configs.config_parameters import Config as Config
36
34
  from .user_configs.config_parameters import ConfigValue as ConfigValue
37
35
  from .user_configs.config_parameters import config_expr as config_expr
38
- from .user_decorators.user_step_decorator import UserStepDecorator as UserStepDecorator
39
- from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
- from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
- from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
42
- from . import tuple_util as tuple_util
36
+ from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
+ from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
43
38
  from . import cards as cards
39
+ from . import tuple_util as tuple_util
44
40
  from . import metaflow_git as metaflow_git
45
41
  from . import events as events
46
42
  from . import runner as runner
@@ -49,8 +45,8 @@ from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package imp
49
45
  from . import includefile as includefile
50
46
  from .includefile import IncludeFile as IncludeFile
51
47
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
52
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
53
48
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
54
50
  from . import client as client
55
51
  from .client.core import namespace as namespace
56
52
  from .client.core import get_namespace as get_namespace
@@ -78,8 +74,8 @@ from .mf_extensions.outerbounds.plugins.checkpoint_datastores.nebius import nebi
78
74
  from .mf_extensions.outerbounds.plugins.checkpoint_datastores.coreweave import coreweave_checkpoints as coreweave_checkpoints
79
75
  from .mf_extensions.outerbounds.plugins.aws.assume_role_decorator import assume_role as assume_role
80
76
  from .mf_extensions.outerbounds.plugins.apps.core.deployer import AppDeployer as AppDeployer
81
- from . import cli_components as cli_components
82
77
  from . import system as system
78
+ from . import cli_components as cli_components
83
79
  from . import pylint_wrapper as pylint_wrapper
84
80
  from . import cli as cli
85
81
  from . import profilers as profilers
@@ -87,8 +83,6 @@ from . import ob_internal as ob_internal
87
83
 
88
84
  EXT_PKG: str
89
85
 
90
- USER_SKIP_STEP: dict
91
-
92
86
  @typing.overload
93
87
  def step(f: typing.Callable[[FlowSpecDerived], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
94
88
  """
@@ -163,85 +157,68 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
163
157
  ...
164
158
 
165
159
  @typing.overload
166
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
160
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
167
161
  """
168
- Specifies the resources needed when executing this step.
169
-
170
- Use `@resources` to specify the resource requirements
171
- independently of the specific compute layer (`@batch`, `@kubernetes`).
172
-
173
- You can choose the compute layer on the command line by executing e.g.
174
- ```
175
- python myflow.py run --with batch
176
- ```
177
- or
178
- ```
179
- python myflow.py run --with kubernetes
180
- ```
181
- which executes the flow on the desired system using the
182
- requirements specified in `@resources`.
183
-
184
-
185
- Parameters
186
- ----------
187
- cpu : int, default 1
188
- Number of CPUs required for this step.
189
- gpu : int, optional, default None
190
- Number of GPUs required for this step.
191
- disk : int, optional, default None
192
- Disk size (in MB) required for this step. Only applies on Kubernetes.
193
- memory : int, default 4096
194
- Memory size (in MB) required for this step.
195
- shared_memory : int, optional, default None
196
- The value for the size (in MiB) of the /dev/shm volume for this step.
197
- This parameter maps to the `--shm-size` option in Docker.
162
+ Decorator prototype for all step decorators. This function gets specialized
163
+ and imported for all decorators types by _import_plugin_decorators().
198
164
  """
199
165
  ...
200
166
 
201
167
  @typing.overload
202
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
168
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
203
169
  ...
204
170
 
205
- @typing.overload
206
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
171
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
172
+ """
173
+ Decorator prototype for all step decorators. This function gets specialized
174
+ and imported for all decorators types by _import_plugin_decorators().
175
+ """
207
176
  ...
208
177
 
209
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
178
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
210
179
  """
211
- Specifies the resources needed when executing this step.
180
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
212
181
 
213
- Use `@resources` to specify the resource requirements
214
- independently of the specific compute layer (`@batch`, `@kubernetes`).
182
+ User code call
183
+ --------------
184
+ @ollama(
185
+ models=[...],
186
+ ...
187
+ )
215
188
 
216
- You can choose the compute layer on the command line by executing e.g.
217
- ```
218
- python myflow.py run --with batch
219
- ```
220
- or
221
- ```
222
- python myflow.py run --with kubernetes
223
- ```
224
- which executes the flow on the desired system using the
225
- requirements specified in `@resources`.
189
+ Valid backend options
190
+ ---------------------
191
+ - 'local': Run as a separate process on the local task machine.
192
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
193
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
194
+
195
+ Valid model options
196
+ -------------------
197
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
226
198
 
227
199
 
228
200
  Parameters
229
201
  ----------
230
- cpu : int, default 1
231
- Number of CPUs required for this step.
232
- gpu : int, optional, default None
233
- Number of GPUs required for this step.
234
- disk : int, optional, default None
235
- Disk size (in MB) required for this step. Only applies on Kubernetes.
236
- memory : int, default 4096
237
- Memory size (in MB) required for this step.
238
- shared_memory : int, optional, default None
239
- The value for the size (in MiB) of the /dev/shm volume for this step.
240
- This parameter maps to the `--shm-size` option in Docker.
202
+ models: list[str]
203
+ List of Ollama containers running models in sidecars.
204
+ backend: str
205
+ Determines where and how to run the Ollama process.
206
+ force_pull: bool
207
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
208
+ cache_update_policy: str
209
+ Cache update policy: "auto", "force", or "never".
210
+ force_cache_update: bool
211
+ Simple override for "force" cache update policy.
212
+ debug: bool
213
+ Whether to turn on verbose debugging logs.
214
+ circuit_breaker_config: dict
215
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
216
+ timeout_config: dict
217
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
241
218
  """
242
219
  ...
243
220
 
244
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
221
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
245
222
  """
246
223
  Specifies that this step should execute on Kubernetes.
247
224
 
@@ -287,7 +264,7 @@ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: ty
287
264
  the scheduled node should not have GPUs.
288
265
  gpu_vendor : str, default KUBERNETES_GPU_VENDOR
289
266
  The vendor of the GPUs to be used for this step.
290
- tolerations : List[Dict[str,str]], default []
267
+ tolerations : List[str], default []
291
268
  The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
292
269
  Kubernetes tolerations to use when launching pod in Kubernetes.
293
270
  labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
@@ -331,379 +308,495 @@ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: ty
331
308
  ...
332
309
 
333
310
  @typing.overload
334
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
311
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
335
312
  """
336
- Decorator prototype for all step decorators. This function gets specialized
337
- and imported for all decorators types by _import_plugin_decorators().
313
+ Specifies that the step will success under all circumstances.
314
+
315
+ The decorator will create an optional artifact, specified by `var`, which
316
+ contains the exception raised. You can use it to detect the presence
317
+ of errors, indicating that all happy-path artifacts produced by the step
318
+ are missing.
319
+
320
+
321
+ Parameters
322
+ ----------
323
+ var : str, optional, default None
324
+ Name of the artifact in which to store the caught exception.
325
+ If not specified, the exception is not stored.
326
+ print_exception : bool, default True
327
+ Determines whether or not the exception is printed to
328
+ stdout when caught.
338
329
  """
339
330
  ...
340
331
 
341
332
  @typing.overload
342
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
333
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
343
334
  ...
344
335
 
345
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
336
+ @typing.overload
337
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
338
+ ...
339
+
340
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
346
341
  """
347
- Decorator prototype for all step decorators. This function gets specialized
348
- and imported for all decorators types by _import_plugin_decorators().
342
+ Specifies that the step will success under all circumstances.
343
+
344
+ The decorator will create an optional artifact, specified by `var`, which
345
+ contains the exception raised. You can use it to detect the presence
346
+ of errors, indicating that all happy-path artifacts produced by the step
347
+ are missing.
348
+
349
+
350
+ Parameters
351
+ ----------
352
+ var : str, optional, default None
353
+ Name of the artifact in which to store the caught exception.
354
+ If not specified, the exception is not stored.
355
+ print_exception : bool, default True
356
+ Determines whether or not the exception is printed to
357
+ stdout when caught.
349
358
  """
350
359
  ...
351
360
 
352
361
  @typing.overload
353
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
362
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
354
363
  """
355
- Enables checkpointing for a step.
364
+ Enables loading / saving of models within a step.
356
365
 
357
366
  > Examples
358
-
359
- - Saving Checkpoints
360
-
367
+ - Saving Models
361
368
  ```python
362
- @checkpoint
369
+ @model
363
370
  @step
364
371
  def train(self):
365
- model = create_model(self.parameters, checkpoint_path = None)
366
- for i in range(self.epochs):
367
- # some training logic
368
- loss = model.train(self.dataset)
369
- if i % 10 == 0:
370
- model.save(
371
- current.checkpoint.directory,
372
- )
373
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
374
- # and returns a reference dictionary to the checkpoint saved in the datastore
375
- self.latest_checkpoint = current.checkpoint.save(
376
- name="epoch_checkpoint",
377
- metadata={
378
- "epoch": i,
379
- "loss": loss,
380
- }
381
- )
382
- ```
372
+ # current.model.save returns a dictionary reference to the model saved
373
+ self.my_model = current.model.save(
374
+ path_to_my_model,
375
+ label="my_model",
376
+ metadata={
377
+ "epochs": 10,
378
+ "batch-size": 32,
379
+ "learning-rate": 0.001,
380
+ }
381
+ )
382
+ self.next(self.test)
383
383
 
384
- - Using Loaded Checkpoints
384
+ @model(load="my_model")
385
+ @step
386
+ def test(self):
387
+ # `current.model.loaded` returns a dictionary of the loaded models
388
+ # where the key is the name of the artifact and the value is the path to the model
389
+ print(os.listdir(current.model.loaded["my_model"]))
390
+ self.next(self.end)
391
+ ```
385
392
 
393
+ - Loading models
386
394
  ```python
387
- @retry(times=3)
388
- @checkpoint
389
395
  @step
390
396
  def train(self):
391
- # Assume that the task has restarted and the previous attempt of the task
392
- # saved a checkpoint
393
- checkpoint_path = None
394
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
395
- print("Loaded checkpoint from the previous attempt")
396
- checkpoint_path = current.checkpoint.directory
397
-
398
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
399
- for i in range(self.epochs):
400
- ...
397
+ # current.model.load returns the path to the model loaded
398
+ checkpoint_path = current.model.load(
399
+ self.checkpoint_key,
400
+ )
401
+ model_path = current.model.load(
402
+ self.model,
403
+ )
404
+ self.next(self.test)
401
405
  ```
402
406
 
403
407
 
404
408
  Parameters
405
409
  ----------
406
- load_policy : str, default: "fresh"
407
- The policy for loading the checkpoint. The following policies are supported:
408
- - "eager": Loads the the latest available checkpoint within the namespace.
409
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
410
- will be loaded at the start of the task.
411
- - "none": Do not load any checkpoint
412
- - "fresh": Loads the lastest checkpoint created within the running Task.
413
- This mode helps loading checkpoints across various retry attempts of the same task.
414
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
415
- created within the task will be loaded when the task is retries execution on failure.
410
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
411
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
412
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
413
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
414
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
415
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
416
416
 
417
417
  temp_dir_root : str, default: None
418
- The root directory under which `current.checkpoint.directory` will be created.
418
+ The root directory under which `current.model.loaded` will store loaded models
419
419
  """
420
420
  ...
421
421
 
422
422
  @typing.overload
423
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
423
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
424
424
  ...
425
425
 
426
426
  @typing.overload
427
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
427
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
428
428
  ...
429
429
 
430
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
430
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
431
431
  """
432
- Enables checkpointing for a step.
432
+ Enables loading / saving of models within a step.
433
433
 
434
434
  > Examples
435
-
436
- - Saving Checkpoints
437
-
435
+ - Saving Models
438
436
  ```python
439
- @checkpoint
437
+ @model
440
438
  @step
441
439
  def train(self):
442
- model = create_model(self.parameters, checkpoint_path = None)
443
- for i in range(self.epochs):
444
- # some training logic
445
- loss = model.train(self.dataset)
446
- if i % 10 == 0:
447
- model.save(
448
- current.checkpoint.directory,
449
- )
450
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
451
- # and returns a reference dictionary to the checkpoint saved in the datastore
452
- self.latest_checkpoint = current.checkpoint.save(
453
- name="epoch_checkpoint",
454
- metadata={
455
- "epoch": i,
456
- "loss": loss,
457
- }
458
- )
459
- ```
440
+ # current.model.save returns a dictionary reference to the model saved
441
+ self.my_model = current.model.save(
442
+ path_to_my_model,
443
+ label="my_model",
444
+ metadata={
445
+ "epochs": 10,
446
+ "batch-size": 32,
447
+ "learning-rate": 0.001,
448
+ }
449
+ )
450
+ self.next(self.test)
460
451
 
461
- - Using Loaded Checkpoints
452
+ @model(load="my_model")
453
+ @step
454
+ def test(self):
455
+ # `current.model.loaded` returns a dictionary of the loaded models
456
+ # where the key is the name of the artifact and the value is the path to the model
457
+ print(os.listdir(current.model.loaded["my_model"]))
458
+ self.next(self.end)
459
+ ```
462
460
 
461
+ - Loading models
463
462
  ```python
464
- @retry(times=3)
465
- @checkpoint
466
463
  @step
467
464
  def train(self):
468
- # Assume that the task has restarted and the previous attempt of the task
469
- # saved a checkpoint
470
- checkpoint_path = None
471
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
472
- print("Loaded checkpoint from the previous attempt")
473
- checkpoint_path = current.checkpoint.directory
474
-
475
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
476
- for i in range(self.epochs):
477
- ...
465
+ # current.model.load returns the path to the model loaded
466
+ checkpoint_path = current.model.load(
467
+ self.checkpoint_key,
468
+ )
469
+ model_path = current.model.load(
470
+ self.model,
471
+ )
472
+ self.next(self.test)
478
473
  ```
479
474
 
480
475
 
481
476
  Parameters
482
477
  ----------
483
- load_policy : str, default: "fresh"
484
- The policy for loading the checkpoint. The following policies are supported:
485
- - "eager": Loads the the latest available checkpoint within the namespace.
486
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
487
- will be loaded at the start of the task.
488
- - "none": Do not load any checkpoint
489
- - "fresh": Loads the lastest checkpoint created within the running Task.
490
- This mode helps loading checkpoints across various retry attempts of the same task.
491
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
492
- created within the task will be loaded when the task is retries execution on failure.
478
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
479
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
480
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
481
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
482
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
483
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
493
484
 
494
485
  temp_dir_root : str, default: None
495
- The root directory under which `current.checkpoint.directory` will be created.
496
- """
497
- ...
498
-
499
- @typing.overload
500
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
501
- """
502
- A simple decorator that demonstrates using CardDecoratorInjector
503
- to inject a card and render simple markdown content.
504
- """
505
- ...
506
-
507
- @typing.overload
508
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
509
- ...
510
-
511
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
512
- """
513
- A simple decorator that demonstrates using CardDecoratorInjector
514
- to inject a card and render simple markdown content.
486
+ The root directory under which `current.model.loaded` will store loaded models
515
487
  """
516
488
  ...
517
489
 
518
490
  @typing.overload
519
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
491
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
520
492
  """
521
- Specifies the Conda environment for the step.
493
+ Creates a human-readable report, a Metaflow Card, after this step completes.
522
494
 
523
- Information in this decorator will augment any
524
- attributes set in the `@conda_base` flow-level decorator. Hence,
525
- you can use `@conda_base` to set packages required by all
526
- steps and use `@conda` to specify step-specific overrides.
495
+ Note that you may add multiple `@card` decorators in a step with different parameters.
527
496
 
528
497
 
529
498
  Parameters
530
499
  ----------
531
- packages : Dict[str, str], default {}
532
- Packages to use for this step. The key is the name of the package
533
- and the value is the version to use.
534
- libraries : Dict[str, str], default {}
535
- Supported for backward compatibility. When used with packages, packages will take precedence.
536
- python : str, optional, default None
537
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
538
- that the version used will correspond to the version of the Python interpreter used to start the run.
539
- disabled : bool, default False
540
- If set to True, disables @conda.
500
+ type : str, default 'default'
501
+ Card type.
502
+ id : str, optional, default None
503
+ If multiple cards are present, use this id to identify this card.
504
+ options : Dict[str, Any], default {}
505
+ Options passed to the card. The contents depend on the card type.
506
+ timeout : int, default 45
507
+ Interrupt reporting if it takes more than this many seconds.
541
508
  """
542
509
  ...
543
510
 
544
511
  @typing.overload
545
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
512
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
546
513
  ...
547
514
 
548
515
  @typing.overload
549
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
516
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
550
517
  ...
551
518
 
552
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
519
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
553
520
  """
554
- Specifies the Conda environment for the step.
521
+ Creates a human-readable report, a Metaflow Card, after this step completes.
555
522
 
556
- Information in this decorator will augment any
557
- attributes set in the `@conda_base` flow-level decorator. Hence,
558
- you can use `@conda_base` to set packages required by all
559
- steps and use `@conda` to specify step-specific overrides.
523
+ Note that you may add multiple `@card` decorators in a step with different parameters.
560
524
 
561
525
 
562
526
  Parameters
563
527
  ----------
564
- packages : Dict[str, str], default {}
565
- Packages to use for this step. The key is the name of the package
566
- and the value is the version to use.
567
- libraries : Dict[str, str], default {}
568
- Supported for backward compatibility. When used with packages, packages will take precedence.
569
- python : str, optional, default None
570
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
571
- that the version used will correspond to the version of the Python interpreter used to start the run.
572
- disabled : bool, default False
573
- If set to True, disables @conda.
528
+ type : str, default 'default'
529
+ Card type.
530
+ id : str, optional, default None
531
+ If multiple cards are present, use this id to identify this card.
532
+ options : Dict[str, Any], default {}
533
+ Options passed to the card. The contents depend on the card type.
534
+ timeout : int, default 45
535
+ Interrupt reporting if it takes more than this many seconds.
574
536
  """
575
537
  ...
576
538
 
577
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
539
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
578
540
  """
579
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
541
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
580
542
 
581
543
  User code call
582
544
  --------------
583
- @ollama(
584
- models=[...],
545
+ @vllm(
546
+ model="...",
585
547
  ...
586
548
  )
587
549
 
588
550
  Valid backend options
589
551
  ---------------------
590
552
  - 'local': Run as a separate process on the local task machine.
591
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
592
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
593
553
 
594
554
  Valid model options
595
555
  -------------------
596
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
556
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
557
+
558
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
559
+ If you need multiple models, you must create multiple @vllm decorators.
597
560
 
598
561
 
599
562
  Parameters
600
563
  ----------
601
- models: list[str]
602
- List of Ollama containers running models in sidecars.
564
+ model: str
565
+ HuggingFace model identifier to be served by vLLM.
603
566
  backend: str
604
- Determines where and how to run the Ollama process.
605
- force_pull: bool
606
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
607
- cache_update_policy: str
608
- Cache update policy: "auto", "force", or "never".
609
- force_cache_update: bool
610
- Simple override for "force" cache update policy.
567
+ Determines where and how to run the vLLM process.
568
+ openai_api_server: bool
569
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
570
+ Default is False (uses native engine).
571
+ Set to True for backward compatibility with existing code.
611
572
  debug: bool
612
573
  Whether to turn on verbose debugging logs.
613
- circuit_breaker_config: dict
614
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
615
- timeout_config: dict
616
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
617
- """
618
- ...
619
-
620
- @typing.overload
621
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
622
- """
623
- Decorator prototype for all step decorators. This function gets specialized
624
- and imported for all decorators types by _import_plugin_decorators().
625
- """
626
- ...
627
-
628
- @typing.overload
629
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
630
- ...
631
-
632
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
633
- """
634
- Decorator prototype for all step decorators. This function gets specialized
635
- and imported for all decorators types by _import_plugin_decorators().
574
+ card_refresh_interval: int
575
+ Interval in seconds for refreshing the vLLM status card.
576
+ Only used when openai_api_server=True.
577
+ max_retries: int
578
+ Maximum number of retries checking for vLLM server startup.
579
+ Only used when openai_api_server=True.
580
+ retry_alert_frequency: int
581
+ Frequency of alert logs for vLLM server startup retries.
582
+ Only used when openai_api_server=True.
583
+ engine_args : dict
584
+ Additional keyword arguments to pass to the vLLM engine.
585
+ For example, `tensor_parallel_size=2`.
636
586
  """
637
587
  ...
638
588
 
639
589
  @typing.overload
640
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
590
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
641
591
  """
642
- Specifies the number of times the task corresponding
643
- to a step needs to be retried.
592
+ Specifies a timeout for your step.
644
593
 
645
- This decorator is useful for handling transient errors, such as networking issues.
646
- If your task contains operations that can't be retried safely, e.g. database updates,
647
- it is advisable to annotate it with `@retry(times=0)`.
594
+ This decorator is useful if this step may hang indefinitely.
648
595
 
649
- This can be used in conjunction with the `@catch` decorator. The `@catch`
650
- decorator will execute a no-op task after all retries have been exhausted,
651
- ensuring that the flow execution can continue.
596
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
597
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
598
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
599
+
600
+ Note that all the values specified in parameters are added together so if you specify
601
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
652
602
 
653
603
 
654
604
  Parameters
655
605
  ----------
656
- times : int, default 3
657
- Number of times to retry this task.
658
- minutes_between_retries : int, default 2
659
- Number of minutes between retries.
606
+ seconds : int, default 0
607
+ Number of seconds to wait prior to timing out.
608
+ minutes : int, default 0
609
+ Number of minutes to wait prior to timing out.
610
+ hours : int, default 0
611
+ Number of hours to wait prior to timing out.
660
612
  """
661
613
  ...
662
614
 
663
615
  @typing.overload
664
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
616
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
665
617
  ...
666
618
 
667
619
  @typing.overload
668
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
620
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
669
621
  ...
670
622
 
671
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
623
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
672
624
  """
673
- Specifies the number of times the task corresponding
674
- to a step needs to be retried.
625
+ Specifies a timeout for your step.
675
626
 
676
- This decorator is useful for handling transient errors, such as networking issues.
677
- If your task contains operations that can't be retried safely, e.g. database updates,
678
- it is advisable to annotate it with `@retry(times=0)`.
627
+ This decorator is useful if this step may hang indefinitely.
679
628
 
680
- This can be used in conjunction with the `@catch` decorator. The `@catch`
681
- decorator will execute a no-op task after all retries have been exhausted,
682
- ensuring that the flow execution can continue.
629
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
630
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
631
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
632
+
633
+ Note that all the values specified in parameters are added together so if you specify
634
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
683
635
 
684
636
 
685
637
  Parameters
686
638
  ----------
687
- times : int, default 3
688
- Number of times to retry this task.
689
- minutes_between_retries : int, default 2
690
- Number of minutes between retries.
639
+ seconds : int, default 0
640
+ Number of seconds to wait prior to timing out.
641
+ minutes : int, default 0
642
+ Number of minutes to wait prior to timing out.
643
+ hours : int, default 0
644
+ Number of hours to wait prior to timing out.
691
645
  """
692
646
  ...
693
647
 
694
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
648
+ @typing.overload
649
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
695
650
  """
696
- Specifies that this step should execute on DGX cloud.
651
+ Internal decorator to support Fast bakery
652
+ """
653
+ ...
654
+
655
+ @typing.overload
656
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
657
+ ...
658
+
659
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
660
+ """
661
+ Internal decorator to support Fast bakery
662
+ """
663
+ ...
664
+
665
+ @typing.overload
666
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
667
+ """
668
+ Specifies the resources needed when executing this step.
669
+
670
+ Use `@resources` to specify the resource requirements
671
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
672
+
673
+ You can choose the compute layer on the command line by executing e.g.
674
+ ```
675
+ python myflow.py run --with batch
676
+ ```
677
+ or
678
+ ```
679
+ python myflow.py run --with kubernetes
680
+ ```
681
+ which executes the flow on the desired system using the
682
+ requirements specified in `@resources`.
697
683
 
698
684
 
699
685
  Parameters
700
686
  ----------
701
- gpu : int
702
- Number of GPUs to use.
703
- gpu_type : str
704
- Type of Nvidia GPU to use.
705
- queue_timeout : int
706
- Time to keep the job in NVCF's queue.
687
+ cpu : int, default 1
688
+ Number of CPUs required for this step.
689
+ gpu : int, optional, default None
690
+ Number of GPUs required for this step.
691
+ disk : int, optional, default None
692
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
693
+ memory : int, default 4096
694
+ Memory size (in MB) required for this step.
695
+ shared_memory : int, optional, default None
696
+ The value for the size (in MiB) of the /dev/shm volume for this step.
697
+ This parameter maps to the `--shm-size` option in Docker.
698
+ """
699
+ ...
700
+
701
+ @typing.overload
702
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
703
+ ...
704
+
705
+ @typing.overload
706
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
707
+ ...
708
+
709
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
710
+ """
711
+ Specifies the resources needed when executing this step.
712
+
713
+ Use `@resources` to specify the resource requirements
714
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
715
+
716
+ You can choose the compute layer on the command line by executing e.g.
717
+ ```
718
+ python myflow.py run --with batch
719
+ ```
720
+ or
721
+ ```
722
+ python myflow.py run --with kubernetes
723
+ ```
724
+ which executes the flow on the desired system using the
725
+ requirements specified in `@resources`.
726
+
727
+
728
+ Parameters
729
+ ----------
730
+ cpu : int, default 1
731
+ Number of CPUs required for this step.
732
+ gpu : int, optional, default None
733
+ Number of GPUs required for this step.
734
+ disk : int, optional, default None
735
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
736
+ memory : int, default 4096
737
+ Memory size (in MB) required for this step.
738
+ shared_memory : int, optional, default None
739
+ The value for the size (in MiB) of the /dev/shm volume for this step.
740
+ This parameter maps to the `--shm-size` option in Docker.
741
+ """
742
+ ...
743
+
744
+ @typing.overload
745
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
746
+ """
747
+ Specifies the Conda environment for the step.
748
+
749
+ Information in this decorator will augment any
750
+ attributes set in the `@conda_base` flow-level decorator. Hence,
751
+ you can use `@conda_base` to set packages required by all
752
+ steps and use `@conda` to specify step-specific overrides.
753
+
754
+
755
+ Parameters
756
+ ----------
757
+ packages : Dict[str, str], default {}
758
+ Packages to use for this step. The key is the name of the package
759
+ and the value is the version to use.
760
+ libraries : Dict[str, str], default {}
761
+ Supported for backward compatibility. When used with packages, packages will take precedence.
762
+ python : str, optional, default None
763
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
764
+ that the version used will correspond to the version of the Python interpreter used to start the run.
765
+ disabled : bool, default False
766
+ If set to True, disables @conda.
767
+ """
768
+ ...
769
+
770
+ @typing.overload
771
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
772
+ ...
773
+
774
+ @typing.overload
775
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
776
+ ...
777
+
778
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
779
+ """
780
+ Specifies the Conda environment for the step.
781
+
782
+ Information in this decorator will augment any
783
+ attributes set in the `@conda_base` flow-level decorator. Hence,
784
+ you can use `@conda_base` to set packages required by all
785
+ steps and use `@conda` to specify step-specific overrides.
786
+
787
+
788
+ Parameters
789
+ ----------
790
+ packages : Dict[str, str], default {}
791
+ Packages to use for this step. The key is the name of the package
792
+ and the value is the version to use.
793
+ libraries : Dict[str, str], default {}
794
+ Supported for backward compatibility. When used with packages, packages will take precedence.
795
+ python : str, optional, default None
796
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
797
+ that the version used will correspond to the version of the Python interpreter used to start the run.
798
+ disabled : bool, default False
799
+ If set to True, disables @conda.
707
800
  """
708
801
  ...
709
802
 
@@ -758,405 +851,306 @@ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
758
851
  """
759
852
  ...
760
853
 
761
- @typing.overload
762
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
854
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
763
855
  """
764
- Specifies environment variables to be set prior to the execution of a step.
856
+ Specifies that this step should execute on DGX cloud.
765
857
 
766
858
 
767
859
  Parameters
768
860
  ----------
769
- vars : Dict[str, str], default {}
770
- Dictionary of environment variables to set.
861
+ gpu : int
862
+ Number of GPUs to use.
863
+ gpu_type : str
864
+ Type of Nvidia GPU to use.
771
865
  """
772
866
  ...
773
867
 
774
868
  @typing.overload
775
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
869
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
870
+ """
871
+ A simple decorator that demonstrates using CardDecoratorInjector
872
+ to inject a card and render simple markdown content.
873
+ """
776
874
  ...
777
875
 
778
876
  @typing.overload
779
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
877
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
780
878
  ...
781
879
 
782
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
880
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
783
881
  """
784
- Specifies environment variables to be set prior to the execution of a step.
785
-
786
-
787
- Parameters
788
- ----------
789
- vars : Dict[str, str], default {}
790
- Dictionary of environment variables to set.
882
+ A simple decorator that demonstrates using CardDecoratorInjector
883
+ to inject a card and render simple markdown content.
791
884
  """
792
885
  ...
793
886
 
794
887
  @typing.overload
795
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
888
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
796
889
  """
797
- Specifies that the step will success under all circumstances.
798
-
799
- The decorator will create an optional artifact, specified by `var`, which
800
- contains the exception raised. You can use it to detect the presence
801
- of errors, indicating that all happy-path artifacts produced by the step
802
- are missing.
803
-
804
-
805
- Parameters
806
- ----------
807
- var : str, optional, default None
808
- Name of the artifact in which to store the caught exception.
809
- If not specified, the exception is not stored.
810
- print_exception : bool, default True
811
- Determines whether or not the exception is printed to
812
- stdout when caught.
890
+ Decorator prototype for all step decorators. This function gets specialized
891
+ and imported for all decorators types by _import_plugin_decorators().
813
892
  """
814
893
  ...
815
894
 
816
895
  @typing.overload
817
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
896
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
818
897
  ...
819
898
 
820
- @typing.overload
821
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
899
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
900
+ """
901
+ Decorator prototype for all step decorators. This function gets specialized
902
+ and imported for all decorators types by _import_plugin_decorators().
903
+ """
822
904
  ...
823
905
 
824
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
906
+ @typing.overload
907
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
825
908
  """
826
- Specifies that the step will success under all circumstances.
909
+ Specifies the number of times the task corresponding
910
+ to a step needs to be retried.
827
911
 
828
- The decorator will create an optional artifact, specified by `var`, which
829
- contains the exception raised. You can use it to detect the presence
830
- of errors, indicating that all happy-path artifacts produced by the step
831
- are missing.
912
+ This decorator is useful for handling transient errors, such as networking issues.
913
+ If your task contains operations that can't be retried safely, e.g. database updates,
914
+ it is advisable to annotate it with `@retry(times=0)`.
915
+
916
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
917
+ decorator will execute a no-op task after all retries have been exhausted,
918
+ ensuring that the flow execution can continue.
832
919
 
833
920
 
834
921
  Parameters
835
922
  ----------
836
- var : str, optional, default None
837
- Name of the artifact in which to store the caught exception.
838
- If not specified, the exception is not stored.
839
- print_exception : bool, default True
840
- Determines whether or not the exception is printed to
841
- stdout when caught.
842
- """
843
- ...
844
-
845
- @typing.overload
846
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
847
- """
848
- Enables loading / saving of models within a step.
849
-
850
- > Examples
851
- - Saving Models
852
- ```python
853
- @model
854
- @step
855
- def train(self):
856
- # current.model.save returns a dictionary reference to the model saved
857
- self.my_model = current.model.save(
858
- path_to_my_model,
859
- label="my_model",
860
- metadata={
861
- "epochs": 10,
862
- "batch-size": 32,
863
- "learning-rate": 0.001,
864
- }
865
- )
866
- self.next(self.test)
867
-
868
- @model(load="my_model")
869
- @step
870
- def test(self):
871
- # `current.model.loaded` returns a dictionary of the loaded models
872
- # where the key is the name of the artifact and the value is the path to the model
873
- print(os.listdir(current.model.loaded["my_model"]))
874
- self.next(self.end)
875
- ```
876
-
877
- - Loading models
878
- ```python
879
- @step
880
- def train(self):
881
- # current.model.load returns the path to the model loaded
882
- checkpoint_path = current.model.load(
883
- self.checkpoint_key,
884
- )
885
- model_path = current.model.load(
886
- self.model,
887
- )
888
- self.next(self.test)
889
- ```
890
-
891
-
892
- Parameters
893
- ----------
894
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
895
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
896
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
897
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
898
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
899
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
900
-
901
- temp_dir_root : str, default: None
902
- The root directory under which `current.model.loaded` will store loaded models
923
+ times : int, default 3
924
+ Number of times to retry this task.
925
+ minutes_between_retries : int, default 2
926
+ Number of minutes between retries.
903
927
  """
904
928
  ...
905
929
 
906
930
  @typing.overload
907
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
931
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
908
932
  ...
909
933
 
910
934
  @typing.overload
911
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
935
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
912
936
  ...
913
937
 
914
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
938
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
915
939
  """
916
- Enables loading / saving of models within a step.
917
-
918
- > Examples
919
- - Saving Models
920
- ```python
921
- @model
922
- @step
923
- def train(self):
924
- # current.model.save returns a dictionary reference to the model saved
925
- self.my_model = current.model.save(
926
- path_to_my_model,
927
- label="my_model",
928
- metadata={
929
- "epochs": 10,
930
- "batch-size": 32,
931
- "learning-rate": 0.001,
932
- }
933
- )
934
- self.next(self.test)
940
+ Specifies the number of times the task corresponding
941
+ to a step needs to be retried.
935
942
 
936
- @model(load="my_model")
937
- @step
938
- def test(self):
939
- # `current.model.loaded` returns a dictionary of the loaded models
940
- # where the key is the name of the artifact and the value is the path to the model
941
- print(os.listdir(current.model.loaded["my_model"]))
942
- self.next(self.end)
943
- ```
943
+ This decorator is useful for handling transient errors, such as networking issues.
944
+ If your task contains operations that can't be retried safely, e.g. database updates,
945
+ it is advisable to annotate it with `@retry(times=0)`.
944
946
 
945
- - Loading models
946
- ```python
947
- @step
948
- def train(self):
949
- # current.model.load returns the path to the model loaded
950
- checkpoint_path = current.model.load(
951
- self.checkpoint_key,
952
- )
953
- model_path = current.model.load(
954
- self.model,
955
- )
956
- self.next(self.test)
957
- ```
947
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
948
+ decorator will execute a no-op task after all retries have been exhausted,
949
+ ensuring that the flow execution can continue.
958
950
 
959
951
 
960
952
  Parameters
961
953
  ----------
962
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
963
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
964
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
965
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
966
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
967
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
968
-
969
- temp_dir_root : str, default: None
970
- The root directory under which `current.model.loaded` will store loaded models
954
+ times : int, default 3
955
+ Number of times to retry this task.
956
+ minutes_between_retries : int, default 2
957
+ Number of minutes between retries.
971
958
  """
972
959
  ...
973
960
 
974
961
  @typing.overload
975
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
962
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
976
963
  """
977
- Internal decorator to support Fast bakery
964
+ Specifies environment variables to be set prior to the execution of a step.
965
+
966
+
967
+ Parameters
968
+ ----------
969
+ vars : Dict[str, str], default {}
970
+ Dictionary of environment variables to set.
978
971
  """
979
972
  ...
980
973
 
981
974
  @typing.overload
982
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
975
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
983
976
  ...
984
977
 
985
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
986
- """
987
- Internal decorator to support Fast bakery
988
- """
978
+ @typing.overload
979
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
989
980
  ...
990
981
 
991
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
982
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
992
983
  """
993
- This decorator is used to run vllm APIs as Metaflow task sidecars.
994
-
995
- User code call
996
- --------------
997
- @vllm(
998
- model="...",
999
- ...
1000
- )
1001
-
1002
- Valid backend options
1003
- ---------------------
1004
- - 'local': Run as a separate process on the local task machine.
1005
-
1006
- Valid model options
1007
- -------------------
1008
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1009
-
1010
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1011
- If you need multiple models, you must create multiple @vllm decorators.
984
+ Specifies environment variables to be set prior to the execution of a step.
1012
985
 
1013
986
 
1014
987
  Parameters
1015
988
  ----------
1016
- model: str
1017
- HuggingFace model identifier to be served by vLLM.
1018
- backend: str
1019
- Determines where and how to run the vLLM process.
1020
- openai_api_server: bool
1021
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1022
- Default is False (uses native engine).
1023
- Set to True for backward compatibility with existing code.
1024
- debug: bool
1025
- Whether to turn on verbose debugging logs.
1026
- card_refresh_interval: int
1027
- Interval in seconds for refreshing the vLLM status card.
1028
- Only used when openai_api_server=True.
1029
- max_retries: int
1030
- Maximum number of retries checking for vLLM server startup.
1031
- Only used when openai_api_server=True.
1032
- retry_alert_frequency: int
1033
- Frequency of alert logs for vLLM server startup retries.
1034
- Only used when openai_api_server=True.
1035
- engine_args : dict
1036
- Additional keyword arguments to pass to the vLLM engine.
1037
- For example, `tensor_parallel_size=2`.
989
+ vars : Dict[str, str], default {}
990
+ Dictionary of environment variables to set.
1038
991
  """
1039
992
  ...
1040
993
 
1041
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
994
+ @typing.overload
995
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1042
996
  """
1043
- Specifies that this step should execute on DGX cloud.
997
+ Enables checkpointing for a step.
1044
998
 
999
+ > Examples
1045
1000
 
1046
- Parameters
1047
- ----------
1048
- gpu : int
1049
- Number of GPUs to use.
1050
- gpu_type : str
1051
- Type of Nvidia GPU to use.
1052
- """
1053
- ...
1054
-
1055
- @typing.overload
1056
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1057
- """
1058
- Creates a human-readable report, a Metaflow Card, after this step completes.
1001
+ - Saving Checkpoints
1059
1002
 
1060
- Note that you may add multiple `@card` decorators in a step with different parameters.
1003
+ ```python
1004
+ @checkpoint
1005
+ @step
1006
+ def train(self):
1007
+ model = create_model(self.parameters, checkpoint_path = None)
1008
+ for i in range(self.epochs):
1009
+ # some training logic
1010
+ loss = model.train(self.dataset)
1011
+ if i % 10 == 0:
1012
+ model.save(
1013
+ current.checkpoint.directory,
1014
+ )
1015
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
1016
+ # and returns a reference dictionary to the checkpoint saved in the datastore
1017
+ self.latest_checkpoint = current.checkpoint.save(
1018
+ name="epoch_checkpoint",
1019
+ metadata={
1020
+ "epoch": i,
1021
+ "loss": loss,
1022
+ }
1023
+ )
1024
+ ```
1025
+
1026
+ - Using Loaded Checkpoints
1027
+
1028
+ ```python
1029
+ @retry(times=3)
1030
+ @checkpoint
1031
+ @step
1032
+ def train(self):
1033
+ # Assume that the task has restarted and the previous attempt of the task
1034
+ # saved a checkpoint
1035
+ checkpoint_path = None
1036
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1037
+ print("Loaded checkpoint from the previous attempt")
1038
+ checkpoint_path = current.checkpoint.directory
1039
+
1040
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1041
+ for i in range(self.epochs):
1042
+ ...
1043
+ ```
1061
1044
 
1062
1045
 
1063
1046
  Parameters
1064
1047
  ----------
1065
- type : str, default 'default'
1066
- Card type.
1067
- id : str, optional, default None
1068
- If multiple cards are present, use this id to identify this card.
1069
- options : Dict[str, Any], default {}
1070
- Options passed to the card. The contents depend on the card type.
1071
- timeout : int, default 45
1072
- Interrupt reporting if it takes more than this many seconds.
1048
+ load_policy : str, default: "fresh"
1049
+ The policy for loading the checkpoint. The following policies are supported:
1050
+ - "eager": Loads the the latest available checkpoint within the namespace.
1051
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1052
+ will be loaded at the start of the task.
1053
+ - "none": Do not load any checkpoint
1054
+ - "fresh": Loads the lastest checkpoint created within the running Task.
1055
+ This mode helps loading checkpoints across various retry attempts of the same task.
1056
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1057
+ created within the task will be loaded when the task is retries execution on failure.
1058
+
1059
+ temp_dir_root : str, default: None
1060
+ The root directory under which `current.checkpoint.directory` will be created.
1073
1061
  """
1074
1062
  ...
1075
1063
 
1076
1064
  @typing.overload
1077
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1065
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1078
1066
  ...
1079
1067
 
1080
1068
  @typing.overload
1081
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1069
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1082
1070
  ...
1083
1071
 
1084
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1072
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
1085
1073
  """
1086
- Creates a human-readable report, a Metaflow Card, after this step completes.
1074
+ Enables checkpointing for a step.
1087
1075
 
1088
- Note that you may add multiple `@card` decorators in a step with different parameters.
1076
+ > Examples
1089
1077
 
1078
+ - Saving Checkpoints
1090
1079
 
1091
- Parameters
1092
- ----------
1093
- type : str, default 'default'
1094
- Card type.
1095
- id : str, optional, default None
1096
- If multiple cards are present, use this id to identify this card.
1097
- options : Dict[str, Any], default {}
1098
- Options passed to the card. The contents depend on the card type.
1099
- timeout : int, default 45
1100
- Interrupt reporting if it takes more than this many seconds.
1101
- """
1102
- ...
1103
-
1104
- @typing.overload
1105
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1106
- """
1107
- Specifies a timeout for your step.
1080
+ ```python
1081
+ @checkpoint
1082
+ @step
1083
+ def train(self):
1084
+ model = create_model(self.parameters, checkpoint_path = None)
1085
+ for i in range(self.epochs):
1086
+ # some training logic
1087
+ loss = model.train(self.dataset)
1088
+ if i % 10 == 0:
1089
+ model.save(
1090
+ current.checkpoint.directory,
1091
+ )
1092
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
1093
+ # and returns a reference dictionary to the checkpoint saved in the datastore
1094
+ self.latest_checkpoint = current.checkpoint.save(
1095
+ name="epoch_checkpoint",
1096
+ metadata={
1097
+ "epoch": i,
1098
+ "loss": loss,
1099
+ }
1100
+ )
1101
+ ```
1108
1102
 
1109
- This decorator is useful if this step may hang indefinitely.
1103
+ - Using Loaded Checkpoints
1110
1104
 
1111
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1112
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1113
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1105
+ ```python
1106
+ @retry(times=3)
1107
+ @checkpoint
1108
+ @step
1109
+ def train(self):
1110
+ # Assume that the task has restarted and the previous attempt of the task
1111
+ # saved a checkpoint
1112
+ checkpoint_path = None
1113
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1114
+ print("Loaded checkpoint from the previous attempt")
1115
+ checkpoint_path = current.checkpoint.directory
1114
1116
 
1115
- Note that all the values specified in parameters are added together so if you specify
1116
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1117
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1118
+ for i in range(self.epochs):
1119
+ ...
1120
+ ```
1117
1121
 
1118
1122
 
1119
1123
  Parameters
1120
1124
  ----------
1121
- seconds : int, default 0
1122
- Number of seconds to wait prior to timing out.
1123
- minutes : int, default 0
1124
- Number of minutes to wait prior to timing out.
1125
- hours : int, default 0
1126
- Number of hours to wait prior to timing out.
1125
+ load_policy : str, default: "fresh"
1126
+ The policy for loading the checkpoint. The following policies are supported:
1127
+ - "eager": Loads the the latest available checkpoint within the namespace.
1128
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1129
+ will be loaded at the start of the task.
1130
+ - "none": Do not load any checkpoint
1131
+ - "fresh": Loads the lastest checkpoint created within the running Task.
1132
+ This mode helps loading checkpoints across various retry attempts of the same task.
1133
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1134
+ created within the task will be loaded when the task is retries execution on failure.
1135
+
1136
+ temp_dir_root : str, default: None
1137
+ The root directory under which `current.checkpoint.directory` will be created.
1127
1138
  """
1128
1139
  ...
1129
1140
 
1130
- @typing.overload
1131
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1132
- ...
1133
-
1134
- @typing.overload
1135
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1136
- ...
1137
-
1138
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1141
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1139
1142
  """
1140
- Specifies a timeout for your step.
1141
-
1142
- This decorator is useful if this step may hang indefinitely.
1143
-
1144
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1145
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1146
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1147
-
1148
- Note that all the values specified in parameters are added together so if you specify
1149
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1143
+ Specifies that this step should execute on DGX cloud.
1150
1144
 
1151
1145
 
1152
1146
  Parameters
1153
1147
  ----------
1154
- seconds : int, default 0
1155
- Number of seconds to wait prior to timing out.
1156
- minutes : int, default 0
1157
- Number of minutes to wait prior to timing out.
1158
- hours : int, default 0
1159
- Number of hours to wait prior to timing out.
1148
+ gpu : int
1149
+ Number of GPUs to use.
1150
+ gpu_type : str
1151
+ Type of Nvidia GPU to use.
1152
+ queue_timeout : int
1153
+ Time to keep the job in NVCF's queue.
1160
1154
  """
1161
1155
  ...
1162
1156
 
@@ -1206,284 +1200,76 @@ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.
1206
1200
  # except for `local_dir`
1207
1201
  @huggingface_hub(load=[
1208
1202
  {
1209
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
1210
- },
1211
- {
1212
- "repo_id": "myorg/mistral-lora",
1213
- "repo_type": "model",
1214
- },
1215
- ])
1216
- @step
1217
- def finetune_model(self):
1218
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1219
- # path_to_model will be /my-directory
1220
- ```
1221
-
1222
-
1223
- Parameters
1224
- ----------
1225
- temp_dir_root : str, optional
1226
- The root directory that will hold the temporary directory where objects will be downloaded.
1227
-
1228
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
1229
- The list of repos (models/datasets) to load.
1230
-
1231
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
1232
-
1233
- - If repo (model/dataset) is not found in the datastore:
1234
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
1235
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
1236
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
1237
-
1238
- - If repo is found in the datastore:
1239
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
1240
- """
1241
- ...
1242
-
1243
- @typing.overload
1244
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1245
- """
1246
- Specifies secrets to be retrieved and injected as environment variables prior to
1247
- the execution of a step.
1248
-
1249
-
1250
- Parameters
1251
- ----------
1252
- sources : List[Union[str, Dict[str, Any]]], default: []
1253
- List of secret specs, defining how the secrets are to be retrieved
1254
- role : str, optional, default: None
1255
- Role to use for fetching secrets
1256
- """
1257
- ...
1258
-
1259
- @typing.overload
1260
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1261
- ...
1262
-
1263
- @typing.overload
1264
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1265
- ...
1266
-
1267
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1268
- """
1269
- Specifies secrets to be retrieved and injected as environment variables prior to
1270
- the execution of a step.
1271
-
1272
-
1273
- Parameters
1274
- ----------
1275
- sources : List[Union[str, Dict[str, Any]]], default: []
1276
- List of secret specs, defining how the secrets are to be retrieved
1277
- role : str, optional, default: None
1278
- Role to use for fetching secrets
1279
- """
1280
- ...
1281
-
1282
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1283
- """
1284
- Allows setting external datastores to save data for the
1285
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1286
-
1287
- This decorator is useful when users wish to save data to a different datastore
1288
- than what is configured in Metaflow. This can be for variety of reasons:
1289
-
1290
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1291
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1292
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1293
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1294
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1295
-
1296
- Usage:
1297
- ----------
1298
-
1299
- - Using a custom IAM role to access the datastore.
1300
-
1301
- ```python
1302
- @with_artifact_store(
1303
- type="s3",
1304
- config=lambda: {
1305
- "root": "s3://my-bucket-foo/path/to/root",
1306
- "role_arn": ROLE,
1307
- },
1308
- )
1309
- class MyFlow(FlowSpec):
1310
-
1311
- @checkpoint
1312
- @step
1313
- def start(self):
1314
- with open("my_file.txt", "w") as f:
1315
- f.write("Hello, World!")
1316
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1317
- self.next(self.end)
1318
-
1319
- ```
1320
-
1321
- - Using credentials to access the s3-compatible datastore.
1322
-
1323
- ```python
1324
- @with_artifact_store(
1325
- type="s3",
1326
- config=lambda: {
1327
- "root": "s3://my-bucket-foo/path/to/root",
1328
- "client_params": {
1329
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1330
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1331
- },
1332
- },
1333
- )
1334
- class MyFlow(FlowSpec):
1335
-
1336
- @checkpoint
1337
- @step
1338
- def start(self):
1339
- with open("my_file.txt", "w") as f:
1340
- f.write("Hello, World!")
1341
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1342
- self.next(self.end)
1203
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
1204
+ },
1205
+ {
1206
+ "repo_id": "myorg/mistral-lora",
1207
+ "repo_type": "model",
1208
+ },
1209
+ ])
1210
+ @step
1211
+ def finetune_model(self):
1212
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1213
+ # path_to_model will be /my-directory
1214
+ ```
1343
1215
 
1344
- ```
1345
1216
 
1346
- - Accessing objects stored in external datastores after task execution.
1217
+ Parameters
1218
+ ----------
1219
+ temp_dir_root : str, optional
1220
+ The root directory that will hold the temporary directory where objects will be downloaded.
1347
1221
 
1348
- ```python
1349
- run = Run("CheckpointsTestsFlow/8992")
1350
- with artifact_store_from(run=run, config={
1351
- "client_params": {
1352
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1353
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1354
- },
1355
- }):
1356
- with Checkpoint() as cp:
1357
- latest = cp.list(
1358
- task=run["start"].task
1359
- )[0]
1360
- print(latest)
1361
- cp.load(
1362
- latest,
1363
- "test-checkpoints"
1364
- )
1222
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
1223
+ The list of repos (models/datasets) to load.
1365
1224
 
1366
- task = Task("TorchTuneFlow/8484/train/53673")
1367
- with artifact_store_from(run=run, config={
1368
- "client_params": {
1369
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1370
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1371
- },
1372
- }):
1373
- load_model(
1374
- task.data.model_ref,
1375
- "test-models"
1376
- )
1377
- ```
1378
- Parameters:
1379
- ----------
1225
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
1380
1226
 
1381
- type: str
1382
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1227
+ - If repo (model/dataset) is not found in the datastore:
1228
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
1229
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
1230
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
1383
1231
 
1384
- config: dict or Callable
1385
- Dictionary of configuration options for the datastore. The following keys are required:
1386
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1387
- - example: 's3://bucket-name/path/to/root'
1388
- - example: 'gs://bucket-name/path/to/root'
1389
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1390
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1391
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1392
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1232
+ - If repo is found in the datastore:
1233
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
1393
1234
  """
1394
1235
  ...
1395
1236
 
1396
1237
  @typing.overload
1397
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1238
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1398
1239
  """
1399
- Specifies the Conda environment for all steps of the flow.
1400
-
1401
- Use `@conda_base` to set common libraries required by all
1402
- steps and use `@conda` to specify step-specific additions.
1240
+ Specifies secrets to be retrieved and injected as environment variables prior to
1241
+ the execution of a step.
1403
1242
 
1404
1243
 
1405
1244
  Parameters
1406
1245
  ----------
1407
- packages : Dict[str, str], default {}
1408
- Packages to use for this flow. The key is the name of the package
1409
- and the value is the version to use.
1410
- libraries : Dict[str, str], default {}
1411
- Supported for backward compatibility. When used with packages, packages will take precedence.
1412
- python : str, optional, default None
1413
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1414
- that the version used will correspond to the version of the Python interpreter used to start the run.
1415
- disabled : bool, default False
1416
- If set to True, disables Conda.
1246
+ sources : List[Union[str, Dict[str, Any]]], default: []
1247
+ List of secret specs, defining how the secrets are to be retrieved
1248
+ role : str, optional, default: None
1249
+ Role to use for fetching secrets
1417
1250
  """
1418
1251
  ...
1419
1252
 
1420
1253
  @typing.overload
1421
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1254
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1422
1255
  ...
1423
1256
 
1424
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1425
- """
1426
- Specifies the Conda environment for all steps of the flow.
1427
-
1428
- Use `@conda_base` to set common libraries required by all
1429
- steps and use `@conda` to specify step-specific additions.
1430
-
1431
-
1432
- Parameters
1433
- ----------
1434
- packages : Dict[str, str], default {}
1435
- Packages to use for this flow. The key is the name of the package
1436
- and the value is the version to use.
1437
- libraries : Dict[str, str], default {}
1438
- Supported for backward compatibility. When used with packages, packages will take precedence.
1439
- python : str, optional, default None
1440
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1441
- that the version used will correspond to the version of the Python interpreter used to start the run.
1442
- disabled : bool, default False
1443
- If set to True, disables Conda.
1444
- """
1257
+ @typing.overload
1258
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1445
1259
  ...
1446
1260
 
1447
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1261
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1448
1262
  """
1449
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1450
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1451
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1452
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1453
- starts only after all sensors finish.
1263
+ Specifies secrets to be retrieved and injected as environment variables prior to
1264
+ the execution of a step.
1454
1265
 
1455
1266
 
1456
1267
  Parameters
1457
1268
  ----------
1458
- timeout : int
1459
- Time, in seconds before the task times out and fails. (Default: 3600)
1460
- poke_interval : int
1461
- Time in seconds that the job should wait in between each try. (Default: 60)
1462
- mode : str
1463
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1464
- exponential_backoff : bool
1465
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1466
- pool : str
1467
- the slot pool this task should run in,
1468
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1469
- soft_fail : bool
1470
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1471
- name : str
1472
- Name of the sensor on Airflow
1473
- description : str
1474
- Description of sensor in the Airflow UI
1475
- bucket_key : Union[str, List[str]]
1476
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1477
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1478
- bucket_name : str
1479
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1480
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1481
- wildcard_match : bool
1482
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1483
- aws_conn_id : str
1484
- a reference to the s3 connection on Airflow. (Default: None)
1485
- verify : bool
1486
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1269
+ sources : List[Union[str, Dict[str, Any]]], default: []
1270
+ List of secret specs, defining how the secrets are to be retrieved
1271
+ role : str, optional, default: None
1272
+ Role to use for fetching secrets
1487
1273
  """
1488
1274
  ...
1489
1275
 
@@ -1580,6 +1366,84 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1580
1366
  """
1581
1367
  ...
1582
1368
 
1369
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1370
+ """
1371
+ Specifies what flows belong to the same project.
1372
+
1373
+ A project-specific namespace is created for all flows that
1374
+ use the same `@project(name)`.
1375
+
1376
+
1377
+ Parameters
1378
+ ----------
1379
+ name : str
1380
+ Project name. Make sure that the name is unique amongst all
1381
+ projects that use the same production scheduler. The name may
1382
+ contain only lowercase alphanumeric characters and underscores.
1383
+
1384
+ branch : Optional[str], default None
1385
+ The branch to use. If not specified, the branch is set to
1386
+ `user.<username>` unless `production` is set to `True`. This can
1387
+ also be set on the command line using `--branch` as a top-level option.
1388
+ It is an error to specify `branch` in the decorator and on the command line.
1389
+
1390
+ production : bool, default False
1391
+ Whether or not the branch is the production branch. This can also be set on the
1392
+ command line using `--production` as a top-level option. It is an error to specify
1393
+ `production` in the decorator and on the command line.
1394
+ The project branch name will be:
1395
+ - if `branch` is specified:
1396
+ - if `production` is True: `prod.<branch>`
1397
+ - if `production` is False: `test.<branch>`
1398
+ - if `branch` is not specified:
1399
+ - if `production` is True: `prod`
1400
+ - if `production` is False: `user.<username>`
1401
+ """
1402
+ ...
1403
+
1404
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1405
+ """
1406
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1407
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1408
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1409
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1410
+ starts only after all sensors finish.
1411
+
1412
+
1413
+ Parameters
1414
+ ----------
1415
+ timeout : int
1416
+ Time, in seconds before the task times out and fails. (Default: 3600)
1417
+ poke_interval : int
1418
+ Time in seconds that the job should wait in between each try. (Default: 60)
1419
+ mode : str
1420
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1421
+ exponential_backoff : bool
1422
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1423
+ pool : str
1424
+ the slot pool this task should run in,
1425
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1426
+ soft_fail : bool
1427
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1428
+ name : str
1429
+ Name of the sensor on Airflow
1430
+ description : str
1431
+ Description of sensor in the Airflow UI
1432
+ bucket_key : Union[str, List[str]]
1433
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1434
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1435
+ bucket_name : str
1436
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1437
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1438
+ wildcard_match : bool
1439
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1440
+ aws_conn_id : str
1441
+ a reference to the s3 connection on Airflow. (Default: None)
1442
+ verify : bool
1443
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1444
+ """
1445
+ ...
1446
+
1583
1447
  @typing.overload
1584
1448
  def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1585
1449
  """
@@ -1631,44 +1495,97 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
1631
1495
  """
1632
1496
  ...
1633
1497
 
1498
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1499
+ """
1500
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1501
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1502
+
1503
+
1504
+ Parameters
1505
+ ----------
1506
+ timeout : int
1507
+ Time, in seconds before the task times out and fails. (Default: 3600)
1508
+ poke_interval : int
1509
+ Time in seconds that the job should wait in between each try. (Default: 60)
1510
+ mode : str
1511
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1512
+ exponential_backoff : bool
1513
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1514
+ pool : str
1515
+ the slot pool this task should run in,
1516
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1517
+ soft_fail : bool
1518
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1519
+ name : str
1520
+ Name of the sensor on Airflow
1521
+ description : str
1522
+ Description of sensor in the Airflow UI
1523
+ external_dag_id : str
1524
+ The dag_id that contains the task you want to wait for.
1525
+ external_task_ids : List[str]
1526
+ The list of task_ids that you want to wait for.
1527
+ If None (default value) the sensor waits for the DAG. (Default: None)
1528
+ allowed_states : List[str]
1529
+ Iterable of allowed states, (Default: ['success'])
1530
+ failed_states : List[str]
1531
+ Iterable of failed or dis-allowed states. (Default: None)
1532
+ execution_delta : datetime.timedelta
1533
+ time difference with the previous execution to look at,
1534
+ the default is the same logical date as the current task or DAG. (Default: None)
1535
+ check_existence: bool
1536
+ Set to True to check if the external task exists or check if
1537
+ the DAG to wait for exists. (Default: True)
1538
+ """
1539
+ ...
1540
+
1634
1541
  @typing.overload
1635
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1542
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1636
1543
  """
1637
- Specifies the PyPI packages for all steps of the flow.
1544
+ Specifies the Conda environment for all steps of the flow.
1545
+
1546
+ Use `@conda_base` to set common libraries required by all
1547
+ steps and use `@conda` to specify step-specific additions.
1638
1548
 
1639
- Use `@pypi_base` to set common packages required by all
1640
- steps and use `@pypi` to specify step-specific overrides.
1641
1549
 
1642
1550
  Parameters
1643
1551
  ----------
1644
- packages : Dict[str, str], default: {}
1552
+ packages : Dict[str, str], default {}
1645
1553
  Packages to use for this flow. The key is the name of the package
1646
1554
  and the value is the version to use.
1647
- python : str, optional, default: None
1555
+ libraries : Dict[str, str], default {}
1556
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1557
+ python : str, optional, default None
1648
1558
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1649
1559
  that the version used will correspond to the version of the Python interpreter used to start the run.
1560
+ disabled : bool, default False
1561
+ If set to True, disables Conda.
1650
1562
  """
1651
1563
  ...
1652
1564
 
1653
1565
  @typing.overload
1654
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1566
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1655
1567
  ...
1656
1568
 
1657
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1569
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1658
1570
  """
1659
- Specifies the PyPI packages for all steps of the flow.
1571
+ Specifies the Conda environment for all steps of the flow.
1572
+
1573
+ Use `@conda_base` to set common libraries required by all
1574
+ steps and use `@conda` to specify step-specific additions.
1660
1575
 
1661
- Use `@pypi_base` to set common packages required by all
1662
- steps and use `@pypi` to specify step-specific overrides.
1663
1576
 
1664
1577
  Parameters
1665
1578
  ----------
1666
- packages : Dict[str, str], default: {}
1579
+ packages : Dict[str, str], default {}
1667
1580
  Packages to use for this flow. The key is the name of the package
1668
1581
  and the value is the version to use.
1669
- python : str, optional, default: None
1582
+ libraries : Dict[str, str], default {}
1583
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1584
+ python : str, optional, default None
1670
1585
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1671
1586
  that the version used will correspond to the version of the Python interpreter used to start the run.
1587
+ disabled : bool, default False
1588
+ If set to True, disables Conda.
1672
1589
  """
1673
1590
  ...
1674
1591
 
@@ -1773,81 +1690,158 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1773
1690
  """
1774
1691
  ...
1775
1692
 
1776
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1693
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1777
1694
  """
1778
- Specifies what flows belong to the same project.
1695
+ Allows setting external datastores to save data for the
1696
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1779
1697
 
1780
- A project-specific namespace is created for all flows that
1781
- use the same `@project(name)`.
1698
+ This decorator is useful when users wish to save data to a different datastore
1699
+ than what is configured in Metaflow. This can be for variety of reasons:
1782
1700
 
1701
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1702
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1703
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1704
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1705
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1783
1706
 
1784
- Parameters
1707
+ Usage:
1785
1708
  ----------
1786
- name : str
1787
- Project name. Make sure that the name is unique amongst all
1788
- projects that use the same production scheduler. The name may
1789
- contain only lowercase alphanumeric characters and underscores.
1790
1709
 
1791
- branch : Optional[str], default None
1792
- The branch to use. If not specified, the branch is set to
1793
- `user.<username>` unless `production` is set to `True`. This can
1794
- also be set on the command line using `--branch` as a top-level option.
1795
- It is an error to specify `branch` in the decorator and on the command line.
1710
+ - Using a custom IAM role to access the datastore.
1796
1711
 
1797
- production : bool, default False
1798
- Whether or not the branch is the production branch. This can also be set on the
1799
- command line using `--production` as a top-level option. It is an error to specify
1800
- `production` in the decorator and on the command line.
1801
- The project branch name will be:
1802
- - if `branch` is specified:
1803
- - if `production` is True: `prod.<branch>`
1804
- - if `production` is False: `test.<branch>`
1805
- - if `branch` is not specified:
1806
- - if `production` is True: `prod`
1807
- - if `production` is False: `user.<username>`
1712
+ ```python
1713
+ @with_artifact_store(
1714
+ type="s3",
1715
+ config=lambda: {
1716
+ "root": "s3://my-bucket-foo/path/to/root",
1717
+ "role_arn": ROLE,
1718
+ },
1719
+ )
1720
+ class MyFlow(FlowSpec):
1721
+
1722
+ @checkpoint
1723
+ @step
1724
+ def start(self):
1725
+ with open("my_file.txt", "w") as f:
1726
+ f.write("Hello, World!")
1727
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1728
+ self.next(self.end)
1729
+
1730
+ ```
1731
+
1732
+ - Using credentials to access the s3-compatible datastore.
1733
+
1734
+ ```python
1735
+ @with_artifact_store(
1736
+ type="s3",
1737
+ config=lambda: {
1738
+ "root": "s3://my-bucket-foo/path/to/root",
1739
+ "client_params": {
1740
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1741
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1742
+ },
1743
+ },
1744
+ )
1745
+ class MyFlow(FlowSpec):
1746
+
1747
+ @checkpoint
1748
+ @step
1749
+ def start(self):
1750
+ with open("my_file.txt", "w") as f:
1751
+ f.write("Hello, World!")
1752
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1753
+ self.next(self.end)
1754
+
1755
+ ```
1756
+
1757
+ - Accessing objects stored in external datastores after task execution.
1758
+
1759
+ ```python
1760
+ run = Run("CheckpointsTestsFlow/8992")
1761
+ with artifact_store_from(run=run, config={
1762
+ "client_params": {
1763
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1764
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1765
+ },
1766
+ }):
1767
+ with Checkpoint() as cp:
1768
+ latest = cp.list(
1769
+ task=run["start"].task
1770
+ )[0]
1771
+ print(latest)
1772
+ cp.load(
1773
+ latest,
1774
+ "test-checkpoints"
1775
+ )
1776
+
1777
+ task = Task("TorchTuneFlow/8484/train/53673")
1778
+ with artifact_store_from(run=run, config={
1779
+ "client_params": {
1780
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1781
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1782
+ },
1783
+ }):
1784
+ load_model(
1785
+ task.data.model_ref,
1786
+ "test-models"
1787
+ )
1788
+ ```
1789
+ Parameters:
1790
+ ----------
1791
+
1792
+ type: str
1793
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1794
+
1795
+ config: dict or Callable
1796
+ Dictionary of configuration options for the datastore. The following keys are required:
1797
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1798
+ - example: 's3://bucket-name/path/to/root'
1799
+ - example: 'gs://bucket-name/path/to/root'
1800
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1801
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1802
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1803
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1808
1804
  """
1809
1805
  ...
1810
1806
 
1811
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1807
+ @typing.overload
1808
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1812
1809
  """
1813
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1814
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1810
+ Specifies the PyPI packages for all steps of the flow.
1815
1811
 
1812
+ Use `@pypi_base` to set common packages required by all
1813
+ steps and use `@pypi` to specify step-specific overrides.
1816
1814
 
1817
1815
  Parameters
1818
1816
  ----------
1819
- timeout : int
1820
- Time, in seconds before the task times out and fails. (Default: 3600)
1821
- poke_interval : int
1822
- Time in seconds that the job should wait in between each try. (Default: 60)
1823
- mode : str
1824
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1825
- exponential_backoff : bool
1826
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1827
- pool : str
1828
- the slot pool this task should run in,
1829
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1830
- soft_fail : bool
1831
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1832
- name : str
1833
- Name of the sensor on Airflow
1834
- description : str
1835
- Description of sensor in the Airflow UI
1836
- external_dag_id : str
1837
- The dag_id that contains the task you want to wait for.
1838
- external_task_ids : List[str]
1839
- The list of task_ids that you want to wait for.
1840
- If None (default value) the sensor waits for the DAG. (Default: None)
1841
- allowed_states : List[str]
1842
- Iterable of allowed states, (Default: ['success'])
1843
- failed_states : List[str]
1844
- Iterable of failed or dis-allowed states. (Default: None)
1845
- execution_delta : datetime.timedelta
1846
- time difference with the previous execution to look at,
1847
- the default is the same logical date as the current task or DAG. (Default: None)
1848
- check_existence: bool
1849
- Set to True to check if the external task exists or check if
1850
- the DAG to wait for exists. (Default: True)
1817
+ packages : Dict[str, str], default: {}
1818
+ Packages to use for this flow. The key is the name of the package
1819
+ and the value is the version to use.
1820
+ python : str, optional, default: None
1821
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1822
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1823
+ """
1824
+ ...
1825
+
1826
+ @typing.overload
1827
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1828
+ ...
1829
+
1830
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1831
+ """
1832
+ Specifies the PyPI packages for all steps of the flow.
1833
+
1834
+ Use `@pypi_base` to set common packages required by all
1835
+ steps and use `@pypi` to specify step-specific overrides.
1836
+
1837
+ Parameters
1838
+ ----------
1839
+ packages : Dict[str, str], default: {}
1840
+ Packages to use for this flow. The key is the name of the package
1841
+ and the value is the version to use.
1842
+ python : str, optional, default: None
1843
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1844
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1851
1845
  """
1852
1846
  ...
1853
1847