ob-metaflow-stubs 6.0.3.188rc2__py2.py3-none-any.whl → 6.0.3.188rc3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (243) hide show
  1. metaflow-stubs/__init__.pyi +1015 -1010
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +4 -4
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +3 -3
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +2 -2
  14. metaflow-stubs/info_file.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +1 -1
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +34 -21
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +2 -2
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +2 -2
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +3 -3
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +2 -2
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +2 -2
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +1 -1
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +2 -2
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +1 -1
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +1 -1
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +2 -2
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +1 -1
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +1 -1
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +9 -1
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +4 -4
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +1 -1
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +3 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +64 -45
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +50 -0
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +28 -1
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +2 -2
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +46 -0
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +3 -3
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +1 -1
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +1 -1
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +2 -2
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +1 -1
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +1 -1
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +2 -2
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +1 -1
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -1
  115. metaflow-stubs/multicore_utils.pyi +1 -1
  116. metaflow-stubs/ob_internal.pyi +2 -1
  117. metaflow-stubs/parameters.pyi +2 -2
  118. metaflow-stubs/plugins/__init__.pyi +13 -13
  119. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  120. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  121. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  122. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  123. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  124. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  125. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  126. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  127. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  128. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  129. metaflow-stubs/plugins/argo/argo_workflows.pyi +2 -2
  130. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +2 -2
  131. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +2 -2
  132. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +2 -2
  133. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  134. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  135. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  136. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  137. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  138. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  139. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  140. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  141. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +2 -2
  142. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  143. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  144. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  145. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  146. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  147. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +2 -2
  148. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +2 -2
  149. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  150. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  151. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  152. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +2 -2
  153. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  154. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  155. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  156. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  157. metaflow-stubs/plugins/cards/card_client.pyi +1 -1
  158. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  159. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  160. metaflow-stubs/plugins/cards/card_decorator.pyi +1 -1
  161. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  162. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  163. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  164. metaflow-stubs/plugins/cards/card_modules/components.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  166. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  167. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  168. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  169. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  170. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  171. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  172. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  173. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  174. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  175. metaflow-stubs/plugins/datatools/s3/s3.pyi +2 -2
  176. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  177. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  178. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  179. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  180. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  181. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  182. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  183. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  184. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  185. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +2 -2
  186. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  187. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  188. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  189. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  190. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  191. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +1 -1
  192. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  193. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  194. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  195. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  196. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  197. metaflow-stubs/plugins/ollama/__init__.pyi +1 -1
  198. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  199. metaflow-stubs/plugins/perimeters.pyi +1 -1
  200. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  201. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  202. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  203. metaflow-stubs/plugins/pypi/conda_environment.pyi +2 -2
  204. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  205. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  206. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  207. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  208. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  209. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  210. metaflow-stubs/plugins/secrets/__init__.pyi +1 -1
  211. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  212. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  213. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  214. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  215. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +1 -1
  216. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  217. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  218. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  219. metaflow-stubs/plugins/uv/uv_environment.pyi +1 -1
  220. metaflow-stubs/profilers/__init__.pyi +1 -1
  221. metaflow-stubs/pylint_wrapper.pyi +1 -1
  222. metaflow-stubs/runner/__init__.pyi +1 -1
  223. metaflow-stubs/runner/deployer.pyi +4 -4
  224. metaflow-stubs/runner/deployer_impl.pyi +1 -1
  225. metaflow-stubs/runner/metaflow_runner.pyi +2 -2
  226. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  227. metaflow-stubs/runner/nbrun.pyi +1 -1
  228. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  229. metaflow-stubs/runner/utils.pyi +1 -1
  230. metaflow-stubs/system/__init__.pyi +1 -1
  231. metaflow-stubs/system/system_logger.pyi +1 -1
  232. metaflow-stubs/system/system_monitor.pyi +1 -1
  233. metaflow-stubs/tagging_util.pyi +1 -1
  234. metaflow-stubs/tuple_util.pyi +1 -1
  235. metaflow-stubs/user_configs/__init__.pyi +1 -1
  236. metaflow-stubs/user_configs/config_decorators.pyi +4 -4
  237. metaflow-stubs/user_configs/config_options.pyi +2 -2
  238. metaflow-stubs/user_configs/config_parameters.pyi +4 -4
  239. {ob_metaflow_stubs-6.0.3.188rc2.dist-info → ob_metaflow_stubs-6.0.3.188rc3.dist-info}/METADATA +1 -1
  240. ob_metaflow_stubs-6.0.3.188rc3.dist-info/RECORD +243 -0
  241. ob_metaflow_stubs-6.0.3.188rc2.dist-info/RECORD +0 -241
  242. {ob_metaflow_stubs-6.0.3.188rc2.dist-info → ob_metaflow_stubs-6.0.3.188rc3.dist-info}/WHEEL +0 -0
  243. {ob_metaflow_stubs-6.0.3.188rc2.dist-info → ob_metaflow_stubs-6.0.3.188rc3.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.15.18.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-09T05:17:09.660228 #
4
+ # Generated on 2025-07-10T08:45:58.339588 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import typing
12
11
  import datetime
12
+ import typing
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -35,18 +35,18 @@ from .user_configs.config_parameters import ConfigValue as ConfigValue
35
35
  from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
- from . import metaflow_git as metaflow_git
39
38
  from . import cards as cards
40
- from . import events as events
41
39
  from . import tuple_util as tuple_util
40
+ from . import metaflow_git as metaflow_git
41
+ from . import events as events
42
42
  from . import runner as runner
43
43
  from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
45
  from . import includefile as includefile
46
46
  from .includefile import IncludeFile as IncludeFile
47
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
47
48
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
48
49
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
50
50
  from . import client as client
51
51
  from .client.core import namespace as namespace
52
52
  from .client.core import get_namespace as get_namespace
@@ -73,6 +73,7 @@ from .mf_extensions.outerbounds.plugins.snowflake.snowflake import Snowflake as
73
73
  from .mf_extensions.outerbounds.plugins.checkpoint_datastores.nebius import nebius_checkpoints as nebius_checkpoints
74
74
  from .mf_extensions.outerbounds.plugins.checkpoint_datastores.coreweave import coreweave_checkpoints as coreweave_checkpoints
75
75
  from .mf_extensions.outerbounds.plugins.aws.assume_role_decorator import assume_role as assume_role
76
+ from .mf_extensions.outerbounds.plugins.apps.core.deployer import AppDeployer as AppDeployer
76
77
  from . import cli_components as cli_components
77
78
  from . import system as system
78
79
  from . import pylint_wrapper as pylint_wrapper
@@ -156,314 +157,384 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
156
157
  ...
157
158
 
158
159
  @typing.overload
159
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
160
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
160
161
  """
161
- Specifies the resources needed when executing this step.
162
+ Specifies the number of times the task corresponding
163
+ to a step needs to be retried.
162
164
 
163
- Use `@resources` to specify the resource requirements
164
- independently of the specific compute layer (`@batch`, `@kubernetes`).
165
+ This decorator is useful for handling transient errors, such as networking issues.
166
+ If your task contains operations that can't be retried safely, e.g. database updates,
167
+ it is advisable to annotate it with `@retry(times=0)`.
165
168
 
166
- You can choose the compute layer on the command line by executing e.g.
167
- ```
168
- python myflow.py run --with batch
169
- ```
170
- or
171
- ```
172
- python myflow.py run --with kubernetes
173
- ```
174
- which executes the flow on the desired system using the
175
- requirements specified in `@resources`.
169
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
170
+ decorator will execute a no-op task after all retries have been exhausted,
171
+ ensuring that the flow execution can continue.
176
172
 
177
173
 
178
174
  Parameters
179
175
  ----------
180
- cpu : int, default 1
181
- Number of CPUs required for this step.
182
- gpu : int, optional, default None
183
- Number of GPUs required for this step.
184
- disk : int, optional, default None
185
- Disk size (in MB) required for this step. Only applies on Kubernetes.
186
- memory : int, default 4096
187
- Memory size (in MB) required for this step.
188
- shared_memory : int, optional, default None
189
- The value for the size (in MiB) of the /dev/shm volume for this step.
190
- This parameter maps to the `--shm-size` option in Docker.
176
+ times : int, default 3
177
+ Number of times to retry this task.
178
+ minutes_between_retries : int, default 2
179
+ Number of minutes between retries.
191
180
  """
192
181
  ...
193
182
 
194
183
  @typing.overload
195
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
184
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
196
185
  ...
197
186
 
198
187
  @typing.overload
199
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
188
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
200
189
  ...
201
190
 
202
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
191
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
203
192
  """
204
- Specifies the resources needed when executing this step.
205
-
206
- Use `@resources` to specify the resource requirements
207
- independently of the specific compute layer (`@batch`, `@kubernetes`).
208
-
209
- You can choose the compute layer on the command line by executing e.g.
210
- ```
211
- python myflow.py run --with batch
212
- ```
213
- or
214
- ```
215
- python myflow.py run --with kubernetes
216
- ```
217
- which executes the flow on the desired system using the
218
- requirements specified in `@resources`.
193
+ Specifies the number of times the task corresponding
194
+ to a step needs to be retried.
219
195
 
196
+ This decorator is useful for handling transient errors, such as networking issues.
197
+ If your task contains operations that can't be retried safely, e.g. database updates,
198
+ it is advisable to annotate it with `@retry(times=0)`.
220
199
 
221
- Parameters
222
- ----------
223
- cpu : int, default 1
224
- Number of CPUs required for this step.
225
- gpu : int, optional, default None
226
- Number of GPUs required for this step.
227
- disk : int, optional, default None
228
- Disk size (in MB) required for this step. Only applies on Kubernetes.
229
- memory : int, default 4096
230
- Memory size (in MB) required for this step.
231
- shared_memory : int, optional, default None
232
- The value for the size (in MiB) of the /dev/shm volume for this step.
233
- This parameter maps to the `--shm-size` option in Docker.
234
- """
235
- ...
236
-
237
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
238
- """
239
- Specifies that this step should execute on DGX cloud.
200
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
201
+ decorator will execute a no-op task after all retries have been exhausted,
202
+ ensuring that the flow execution can continue.
240
203
 
241
204
 
242
205
  Parameters
243
206
  ----------
244
- gpu : int
245
- Number of GPUs to use.
246
- gpu_type : str
247
- Type of Nvidia GPU to use.
248
- queue_timeout : int
249
- Time to keep the job in NVCF's queue.
207
+ times : int, default 3
208
+ Number of times to retry this task.
209
+ minutes_between_retries : int, default 2
210
+ Number of minutes between retries.
250
211
  """
251
212
  ...
252
213
 
253
214
  @typing.overload
254
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
215
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
255
216
  """
256
- Specifies a timeout for your step.
257
-
258
- This decorator is useful if this step may hang indefinitely.
259
-
260
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
261
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
262
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
263
-
264
- Note that all the values specified in parameters are added together so if you specify
265
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
217
+ Specifies secrets to be retrieved and injected as environment variables prior to
218
+ the execution of a step.
266
219
 
267
220
 
268
221
  Parameters
269
222
  ----------
270
- seconds : int, default 0
271
- Number of seconds to wait prior to timing out.
272
- minutes : int, default 0
273
- Number of minutes to wait prior to timing out.
274
- hours : int, default 0
275
- Number of hours to wait prior to timing out.
223
+ sources : List[Union[str, Dict[str, Any]]], default: []
224
+ List of secret specs, defining how the secrets are to be retrieved
276
225
  """
277
226
  ...
278
227
 
279
228
  @typing.overload
280
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
229
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
281
230
  ...
282
231
 
283
232
  @typing.overload
284
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
233
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
285
234
  ...
286
235
 
287
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
236
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
288
237
  """
289
- Specifies a timeout for your step.
290
-
291
- This decorator is useful if this step may hang indefinitely.
292
-
293
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
294
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
295
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
296
-
297
- Note that all the values specified in parameters are added together so if you specify
298
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
238
+ Specifies secrets to be retrieved and injected as environment variables prior to
239
+ the execution of a step.
299
240
 
300
241
 
301
242
  Parameters
302
243
  ----------
303
- seconds : int, default 0
304
- Number of seconds to wait prior to timing out.
305
- minutes : int, default 0
306
- Number of minutes to wait prior to timing out.
307
- hours : int, default 0
308
- Number of hours to wait prior to timing out.
244
+ sources : List[Union[str, Dict[str, Any]]], default: []
245
+ List of secret specs, defining how the secrets are to be retrieved
309
246
  """
310
247
  ...
311
248
 
312
249
  @typing.overload
313
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
250
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
314
251
  """
315
- Specifies the PyPI packages for the step.
252
+ Enables checkpointing for a step.
316
253
 
317
- Information in this decorator will augment any
318
- attributes set in the `@pyi_base` flow-level decorator. Hence,
319
- you can use `@pypi_base` to set packages required by all
320
- steps and use `@pypi` to specify step-specific overrides.
254
+ > Examples
255
+
256
+ - Saving Checkpoints
257
+
258
+ ```python
259
+ @checkpoint
260
+ @step
261
+ def train(self):
262
+ model = create_model(self.parameters, checkpoint_path = None)
263
+ for i in range(self.epochs):
264
+ # some training logic
265
+ loss = model.train(self.dataset)
266
+ if i % 10 == 0:
267
+ model.save(
268
+ current.checkpoint.directory,
269
+ )
270
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
271
+ # and returns a reference dictionary to the checkpoint saved in the datastore
272
+ self.latest_checkpoint = current.checkpoint.save(
273
+ name="epoch_checkpoint",
274
+ metadata={
275
+ "epoch": i,
276
+ "loss": loss,
277
+ }
278
+ )
279
+ ```
280
+
281
+ - Using Loaded Checkpoints
282
+
283
+ ```python
284
+ @retry(times=3)
285
+ @checkpoint
286
+ @step
287
+ def train(self):
288
+ # Assume that the task has restarted and the previous attempt of the task
289
+ # saved a checkpoint
290
+ checkpoint_path = None
291
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
292
+ print("Loaded checkpoint from the previous attempt")
293
+ checkpoint_path = current.checkpoint.directory
294
+
295
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
296
+ for i in range(self.epochs):
297
+ ...
298
+ ```
321
299
 
322
300
 
323
301
  Parameters
324
302
  ----------
325
- packages : Dict[str, str], default: {}
326
- Packages to use for this step. The key is the name of the package
327
- and the value is the version to use.
328
- python : str, optional, default: None
329
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
330
- that the version used will correspond to the version of the Python interpreter used to start the run.
303
+ load_policy : str, default: "fresh"
304
+ The policy for loading the checkpoint. The following policies are supported:
305
+ - "eager": Loads the the latest available checkpoint within the namespace.
306
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
307
+ will be loaded at the start of the task.
308
+ - "none": Do not load any checkpoint
309
+ - "fresh": Loads the lastest checkpoint created within the running Task.
310
+ This mode helps loading checkpoints across various retry attempts of the same task.
311
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
312
+ created within the task will be loaded when the task is retries execution on failure.
313
+
314
+ temp_dir_root : str, default: None
315
+ The root directory under which `current.checkpoint.directory` will be created.
331
316
  """
332
317
  ...
333
318
 
334
319
  @typing.overload
335
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
320
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
336
321
  ...
337
322
 
338
323
  @typing.overload
339
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
324
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
340
325
  ...
341
326
 
342
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
327
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
343
328
  """
344
- Specifies the PyPI packages for the step.
345
-
346
- Information in this decorator will augment any
347
- attributes set in the `@pyi_base` flow-level decorator. Hence,
348
- you can use `@pypi_base` to set packages required by all
349
- steps and use `@pypi` to specify step-specific overrides.
350
-
329
+ Enables checkpointing for a step.
351
330
 
352
- Parameters
353
- ----------
354
- packages : Dict[str, str], default: {}
355
- Packages to use for this step. The key is the name of the package
356
- and the value is the version to use.
357
- python : str, optional, default: None
358
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
359
- that the version used will correspond to the version of the Python interpreter used to start the run.
360
- """
361
- ...
362
-
363
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
364
- """
365
- Specifies that this step should execute on DGX cloud.
331
+ > Examples
366
332
 
333
+ - Saving Checkpoints
367
334
 
368
- Parameters
369
- ----------
370
- gpu : int
371
- Number of GPUs to use.
372
- gpu_type : str
373
- Type of Nvidia GPU to use.
335
+ ```python
336
+ @checkpoint
337
+ @step
338
+ def train(self):
339
+ model = create_model(self.parameters, checkpoint_path = None)
340
+ for i in range(self.epochs):
341
+ # some training logic
342
+ loss = model.train(self.dataset)
343
+ if i % 10 == 0:
344
+ model.save(
345
+ current.checkpoint.directory,
346
+ )
347
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
348
+ # and returns a reference dictionary to the checkpoint saved in the datastore
349
+ self.latest_checkpoint = current.checkpoint.save(
350
+ name="epoch_checkpoint",
351
+ metadata={
352
+ "epoch": i,
353
+ "loss": loss,
354
+ }
355
+ )
356
+ ```
357
+
358
+ - Using Loaded Checkpoints
359
+
360
+ ```python
361
+ @retry(times=3)
362
+ @checkpoint
363
+ @step
364
+ def train(self):
365
+ # Assume that the task has restarted and the previous attempt of the task
366
+ # saved a checkpoint
367
+ checkpoint_path = None
368
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
369
+ print("Loaded checkpoint from the previous attempt")
370
+ checkpoint_path = current.checkpoint.directory
371
+
372
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
373
+ for i in range(self.epochs):
374
+ ...
375
+ ```
376
+
377
+
378
+ Parameters
379
+ ----------
380
+ load_policy : str, default: "fresh"
381
+ The policy for loading the checkpoint. The following policies are supported:
382
+ - "eager": Loads the the latest available checkpoint within the namespace.
383
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
384
+ will be loaded at the start of the task.
385
+ - "none": Do not load any checkpoint
386
+ - "fresh": Loads the lastest checkpoint created within the running Task.
387
+ This mode helps loading checkpoints across various retry attempts of the same task.
388
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
389
+ created within the task will be loaded when the task is retries execution on failure.
390
+
391
+ temp_dir_root : str, default: None
392
+ The root directory under which `current.checkpoint.directory` will be created.
374
393
  """
375
394
  ...
376
395
 
377
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
396
+ @typing.overload
397
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
378
398
  """
379
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
399
+ Enables loading / saving of models within a step.
380
400
 
381
- User code call
382
- --------------
383
- @ollama(
384
- models=[...],
385
- ...
386
- )
401
+ > Examples
402
+ - Saving Models
403
+ ```python
404
+ @model
405
+ @step
406
+ def train(self):
407
+ # current.model.save returns a dictionary reference to the model saved
408
+ self.my_model = current.model.save(
409
+ path_to_my_model,
410
+ label="my_model",
411
+ metadata={
412
+ "epochs": 10,
413
+ "batch-size": 32,
414
+ "learning-rate": 0.001,
415
+ }
416
+ )
417
+ self.next(self.test)
387
418
 
388
- Valid backend options
389
- ---------------------
390
- - 'local': Run as a separate process on the local task machine.
391
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
392
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
419
+ @model(load="my_model")
420
+ @step
421
+ def test(self):
422
+ # `current.model.loaded` returns a dictionary of the loaded models
423
+ # where the key is the name of the artifact and the value is the path to the model
424
+ print(os.listdir(current.model.loaded["my_model"]))
425
+ self.next(self.end)
426
+ ```
393
427
 
394
- Valid model options
395
- -------------------
396
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
428
+ - Loading models
429
+ ```python
430
+ @step
431
+ def train(self):
432
+ # current.model.load returns the path to the model loaded
433
+ checkpoint_path = current.model.load(
434
+ self.checkpoint_key,
435
+ )
436
+ model_path = current.model.load(
437
+ self.model,
438
+ )
439
+ self.next(self.test)
440
+ ```
397
441
 
398
442
 
399
443
  Parameters
400
444
  ----------
401
- models: list[str]
402
- List of Ollama containers running models in sidecars.
403
- backend: str
404
- Determines where and how to run the Ollama process.
405
- force_pull: bool
406
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
407
- cache_update_policy: str
408
- Cache update policy: "auto", "force", or "never".
409
- force_cache_update: bool
410
- Simple override for "force" cache update policy.
411
- debug: bool
412
- Whether to turn on verbose debugging logs.
413
- circuit_breaker_config: dict
414
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
415
- timeout_config: dict
416
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
445
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
446
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
447
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
448
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
449
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
450
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
451
+
452
+ temp_dir_root : str, default: None
453
+ The root directory under which `current.model.loaded` will store loaded models
417
454
  """
418
455
  ...
419
456
 
420
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
457
+ @typing.overload
458
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
459
+ ...
460
+
461
+ @typing.overload
462
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
463
+ ...
464
+
465
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
421
466
  """
422
- This decorator is used to run vllm APIs as Metaflow task sidecars.
467
+ Enables loading / saving of models within a step.
423
468
 
424
- User code call
425
- --------------
426
- @vllm(
427
- model="...",
428
- ...
429
- )
469
+ > Examples
470
+ - Saving Models
471
+ ```python
472
+ @model
473
+ @step
474
+ def train(self):
475
+ # current.model.save returns a dictionary reference to the model saved
476
+ self.my_model = current.model.save(
477
+ path_to_my_model,
478
+ label="my_model",
479
+ metadata={
480
+ "epochs": 10,
481
+ "batch-size": 32,
482
+ "learning-rate": 0.001,
483
+ }
484
+ )
485
+ self.next(self.test)
430
486
 
431
- Valid backend options
432
- ---------------------
433
- - 'local': Run as a separate process on the local task machine.
487
+ @model(load="my_model")
488
+ @step
489
+ def test(self):
490
+ # `current.model.loaded` returns a dictionary of the loaded models
491
+ # where the key is the name of the artifact and the value is the path to the model
492
+ print(os.listdir(current.model.loaded["my_model"]))
493
+ self.next(self.end)
494
+ ```
434
495
 
435
- Valid model options
436
- -------------------
437
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
496
+ - Loading models
497
+ ```python
498
+ @step
499
+ def train(self):
500
+ # current.model.load returns the path to the model loaded
501
+ checkpoint_path = current.model.load(
502
+ self.checkpoint_key,
503
+ )
504
+ model_path = current.model.load(
505
+ self.model,
506
+ )
507
+ self.next(self.test)
508
+ ```
438
509
 
439
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
440
- If you need multiple models, you must create multiple @vllm decorators.
510
+
511
+ Parameters
512
+ ----------
513
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
514
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
515
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
516
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
517
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
518
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
519
+
520
+ temp_dir_root : str, default: None
521
+ The root directory under which `current.model.loaded` will store loaded models
522
+ """
523
+ ...
524
+
525
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
526
+ """
527
+ Specifies that this step should execute on DGX cloud.
441
528
 
442
529
 
443
530
  Parameters
444
531
  ----------
445
- model: str
446
- HuggingFace model identifier to be served by vLLM.
447
- backend: str
448
- Determines where and how to run the vLLM process.
449
- openai_api_server: bool
450
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
451
- Default is False (uses native engine).
452
- Set to True for backward compatibility with existing code.
453
- debug: bool
454
- Whether to turn on verbose debugging logs.
455
- card_refresh_interval: int
456
- Interval in seconds for refreshing the vLLM status card.
457
- Only used when openai_api_server=True.
458
- max_retries: int
459
- Maximum number of retries checking for vLLM server startup.
460
- Only used when openai_api_server=True.
461
- retry_alert_frequency: int
462
- Frequency of alert logs for vLLM server startup retries.
463
- Only used when openai_api_server=True.
464
- engine_args : dict
465
- Additional keyword arguments to pass to the vLLM engine.
466
- For example, `tensor_parallel_size=2`.
532
+ gpu : int
533
+ Number of GPUs to use.
534
+ gpu_type : str
535
+ Type of Nvidia GPU to use.
536
+ queue_timeout : int
537
+ Time to keep the job in NVCF's queue.
467
538
  """
468
539
  ...
469
540
 
@@ -527,7 +598,58 @@ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
527
598
  ...
528
599
 
529
600
  @typing.overload
530
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
601
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
602
+ """
603
+ Specifies the PyPI packages for the step.
604
+
605
+ Information in this decorator will augment any
606
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
607
+ you can use `@pypi_base` to set packages required by all
608
+ steps and use `@pypi` to specify step-specific overrides.
609
+
610
+
611
+ Parameters
612
+ ----------
613
+ packages : Dict[str, str], default: {}
614
+ Packages to use for this step. The key is the name of the package
615
+ and the value is the version to use.
616
+ python : str, optional, default: None
617
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
618
+ that the version used will correspond to the version of the Python interpreter used to start the run.
619
+ """
620
+ ...
621
+
622
+ @typing.overload
623
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
624
+ ...
625
+
626
+ @typing.overload
627
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
628
+ ...
629
+
630
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
631
+ """
632
+ Specifies the PyPI packages for the step.
633
+
634
+ Information in this decorator will augment any
635
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
636
+ you can use `@pypi_base` to set packages required by all
637
+ steps and use `@pypi` to specify step-specific overrides.
638
+
639
+
640
+ Parameters
641
+ ----------
642
+ packages : Dict[str, str], default: {}
643
+ Packages to use for this step. The key is the name of the package
644
+ and the value is the version to use.
645
+ python : str, optional, default: None
646
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
647
+ that the version used will correspond to the version of the Python interpreter used to start the run.
648
+ """
649
+ ...
650
+
651
+ @typing.overload
652
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
531
653
  """
532
654
  Specifies that the step will success under all circumstances.
533
655
 
@@ -657,174 +779,342 @@ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.
657
779
  """
658
780
  ...
659
781
 
660
- @typing.overload
661
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
782
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
662
783
  """
663
- Enables loading / saving of models within a step.
784
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
664
785
 
665
- > Examples
666
- - Saving Models
667
- ```python
668
- @model
669
- @step
670
- def train(self):
671
- # current.model.save returns a dictionary reference to the model saved
672
- self.my_model = current.model.save(
673
- path_to_my_model,
674
- label="my_model",
675
- metadata={
676
- "epochs": 10,
677
- "batch-size": 32,
678
- "learning-rate": 0.001,
679
- }
680
- )
681
- self.next(self.test)
786
+ User code call
787
+ --------------
788
+ @ollama(
789
+ models=[...],
790
+ ...
791
+ )
682
792
 
683
- @model(load="my_model")
684
- @step
685
- def test(self):
686
- # `current.model.loaded` returns a dictionary of the loaded models
687
- # where the key is the name of the artifact and the value is the path to the model
688
- print(os.listdir(current.model.loaded["my_model"]))
689
- self.next(self.end)
690
- ```
793
+ Valid backend options
794
+ ---------------------
795
+ - 'local': Run as a separate process on the local task machine.
796
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
797
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
691
798
 
692
- - Loading models
693
- ```python
694
- @step
695
- def train(self):
696
- # current.model.load returns the path to the model loaded
697
- checkpoint_path = current.model.load(
698
- self.checkpoint_key,
699
- )
700
- model_path = current.model.load(
701
- self.model,
702
- )
703
- self.next(self.test)
704
- ```
799
+ Valid model options
800
+ -------------------
801
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
705
802
 
706
803
 
707
804
  Parameters
708
805
  ----------
709
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
710
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
711
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
712
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
713
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
714
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
715
-
716
- temp_dir_root : str, default: None
717
- The root directory under which `current.model.loaded` will store loaded models
806
+ models: list[str]
807
+ List of Ollama containers running models in sidecars.
808
+ backend: str
809
+ Determines where and how to run the Ollama process.
810
+ force_pull: bool
811
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
812
+ cache_update_policy: str
813
+ Cache update policy: "auto", "force", or "never".
814
+ force_cache_update: bool
815
+ Simple override for "force" cache update policy.
816
+ debug: bool
817
+ Whether to turn on verbose debugging logs.
818
+ circuit_breaker_config: dict
819
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
820
+ timeout_config: dict
821
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
718
822
  """
719
823
  ...
720
824
 
721
- @typing.overload
722
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
723
- ...
724
-
725
- @typing.overload
726
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
727
- ...
728
-
729
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
825
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
730
826
  """
731
- Enables loading / saving of models within a step.
732
-
733
- > Examples
734
- - Saving Models
735
- ```python
736
- @model
737
- @step
738
- def train(self):
739
- # current.model.save returns a dictionary reference to the model saved
740
- self.my_model = current.model.save(
741
- path_to_my_model,
742
- label="my_model",
743
- metadata={
744
- "epochs": 10,
745
- "batch-size": 32,
746
- "learning-rate": 0.001,
747
- }
748
- )
749
- self.next(self.test)
750
-
751
- @model(load="my_model")
752
- @step
753
- def test(self):
754
- # `current.model.loaded` returns a dictionary of the loaded models
755
- # where the key is the name of the artifact and the value is the path to the model
756
- print(os.listdir(current.model.loaded["my_model"]))
757
- self.next(self.end)
758
- ```
759
-
760
- - Loading models
761
- ```python
762
- @step
763
- def train(self):
764
- # current.model.load returns the path to the model loaded
765
- checkpoint_path = current.model.load(
766
- self.checkpoint_key,
767
- )
768
- model_path = current.model.load(
769
- self.model,
770
- )
771
- self.next(self.test)
772
- ```
827
+ Specifies that this step should execute on DGX cloud.
773
828
 
774
829
 
775
830
  Parameters
776
831
  ----------
777
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
778
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
779
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
780
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
781
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
782
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
783
-
784
- temp_dir_root : str, default: None
785
- The root directory under which `current.model.loaded` will store loaded models
832
+ gpu : int
833
+ Number of GPUs to use.
834
+ gpu_type : str
835
+ Type of Nvidia GPU to use.
786
836
  """
787
837
  ...
788
838
 
789
839
  @typing.overload
790
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
840
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
791
841
  """
792
- Specifies environment variables to be set prior to the execution of a step.
842
+ Specifies a timeout for your step.
843
+
844
+ This decorator is useful if this step may hang indefinitely.
845
+
846
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
847
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
848
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
849
+
850
+ Note that all the values specified in parameters are added together so if you specify
851
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
793
852
 
794
853
 
795
854
  Parameters
796
855
  ----------
797
- vars : Dict[str, str], default {}
798
- Dictionary of environment variables to set.
856
+ seconds : int, default 0
857
+ Number of seconds to wait prior to timing out.
858
+ minutes : int, default 0
859
+ Number of minutes to wait prior to timing out.
860
+ hours : int, default 0
861
+ Number of hours to wait prior to timing out.
799
862
  """
800
863
  ...
801
864
 
802
865
  @typing.overload
803
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
866
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
804
867
  ...
805
868
 
806
869
  @typing.overload
807
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
870
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
808
871
  ...
809
872
 
810
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
873
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
811
874
  """
812
- Specifies environment variables to be set prior to the execution of a step.
875
+ Specifies a timeout for your step.
876
+
877
+ This decorator is useful if this step may hang indefinitely.
878
+
879
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
880
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
881
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
882
+
883
+ Note that all the values specified in parameters are added together so if you specify
884
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
813
885
 
814
886
 
815
887
  Parameters
816
888
  ----------
817
- vars : Dict[str, str], default {}
818
- Dictionary of environment variables to set.
889
+ seconds : int, default 0
890
+ Number of seconds to wait prior to timing out.
891
+ minutes : int, default 0
892
+ Number of minutes to wait prior to timing out.
893
+ hours : int, default 0
894
+ Number of hours to wait prior to timing out.
819
895
  """
820
896
  ...
821
897
 
822
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
898
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
823
899
  """
824
- Specifies that this step should execute on Kubernetes.
900
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
825
901
 
902
+ User code call
903
+ --------------
904
+ @vllm(
905
+ model="...",
906
+ ...
907
+ )
826
908
 
827
- Parameters
909
+ Valid backend options
910
+ ---------------------
911
+ - 'local': Run as a separate process on the local task machine.
912
+
913
+ Valid model options
914
+ -------------------
915
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
916
+
917
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
918
+ If you need multiple models, you must create multiple @vllm decorators.
919
+
920
+
921
+ Parameters
922
+ ----------
923
+ model: str
924
+ HuggingFace model identifier to be served by vLLM.
925
+ backend: str
926
+ Determines where and how to run the vLLM process.
927
+ openai_api_server: bool
928
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
929
+ Default is False (uses native engine).
930
+ Set to True for backward compatibility with existing code.
931
+ debug: bool
932
+ Whether to turn on verbose debugging logs.
933
+ card_refresh_interval: int
934
+ Interval in seconds for refreshing the vLLM status card.
935
+ Only used when openai_api_server=True.
936
+ max_retries: int
937
+ Maximum number of retries checking for vLLM server startup.
938
+ Only used when openai_api_server=True.
939
+ retry_alert_frequency: int
940
+ Frequency of alert logs for vLLM server startup retries.
941
+ Only used when openai_api_server=True.
942
+ engine_args : dict
943
+ Additional keyword arguments to pass to the vLLM engine.
944
+ For example, `tensor_parallel_size=2`.
945
+ """
946
+ ...
947
+
948
+ @typing.overload
949
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
950
+ """
951
+ Decorator prototype for all step decorators. This function gets specialized
952
+ and imported for all decorators types by _import_plugin_decorators().
953
+ """
954
+ ...
955
+
956
+ @typing.overload
957
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
958
+ ...
959
+
960
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
961
+ """
962
+ Decorator prototype for all step decorators. This function gets specialized
963
+ and imported for all decorators types by _import_plugin_decorators().
964
+ """
965
+ ...
966
+
967
+ @typing.overload
968
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
969
+ """
970
+ Creates a human-readable report, a Metaflow Card, after this step completes.
971
+
972
+ Note that you may add multiple `@card` decorators in a step with different parameters.
973
+
974
+
975
+ Parameters
976
+ ----------
977
+ type : str, default 'default'
978
+ Card type.
979
+ id : str, optional, default None
980
+ If multiple cards are present, use this id to identify this card.
981
+ options : Dict[str, Any], default {}
982
+ Options passed to the card. The contents depend on the card type.
983
+ timeout : int, default 45
984
+ Interrupt reporting if it takes more than this many seconds.
985
+ """
986
+ ...
987
+
988
+ @typing.overload
989
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
990
+ ...
991
+
992
+ @typing.overload
993
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
994
+ ...
995
+
996
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
997
+ """
998
+ Creates a human-readable report, a Metaflow Card, after this step completes.
999
+
1000
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1001
+
1002
+
1003
+ Parameters
1004
+ ----------
1005
+ type : str, default 'default'
1006
+ Card type.
1007
+ id : str, optional, default None
1008
+ If multiple cards are present, use this id to identify this card.
1009
+ options : Dict[str, Any], default {}
1010
+ Options passed to the card. The contents depend on the card type.
1011
+ timeout : int, default 45
1012
+ Interrupt reporting if it takes more than this many seconds.
1013
+ """
1014
+ ...
1015
+
1016
+ @typing.overload
1017
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1018
+ """
1019
+ Specifies the resources needed when executing this step.
1020
+
1021
+ Use `@resources` to specify the resource requirements
1022
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1023
+
1024
+ You can choose the compute layer on the command line by executing e.g.
1025
+ ```
1026
+ python myflow.py run --with batch
1027
+ ```
1028
+ or
1029
+ ```
1030
+ python myflow.py run --with kubernetes
1031
+ ```
1032
+ which executes the flow on the desired system using the
1033
+ requirements specified in `@resources`.
1034
+
1035
+
1036
+ Parameters
1037
+ ----------
1038
+ cpu : int, default 1
1039
+ Number of CPUs required for this step.
1040
+ gpu : int, optional, default None
1041
+ Number of GPUs required for this step.
1042
+ disk : int, optional, default None
1043
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1044
+ memory : int, default 4096
1045
+ Memory size (in MB) required for this step.
1046
+ shared_memory : int, optional, default None
1047
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1048
+ This parameter maps to the `--shm-size` option in Docker.
1049
+ """
1050
+ ...
1051
+
1052
+ @typing.overload
1053
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1054
+ ...
1055
+
1056
+ @typing.overload
1057
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1058
+ ...
1059
+
1060
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1061
+ """
1062
+ Specifies the resources needed when executing this step.
1063
+
1064
+ Use `@resources` to specify the resource requirements
1065
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1066
+
1067
+ You can choose the compute layer on the command line by executing e.g.
1068
+ ```
1069
+ python myflow.py run --with batch
1070
+ ```
1071
+ or
1072
+ ```
1073
+ python myflow.py run --with kubernetes
1074
+ ```
1075
+ which executes the flow on the desired system using the
1076
+ requirements specified in `@resources`.
1077
+
1078
+
1079
+ Parameters
1080
+ ----------
1081
+ cpu : int, default 1
1082
+ Number of CPUs required for this step.
1083
+ gpu : int, optional, default None
1084
+ Number of GPUs required for this step.
1085
+ disk : int, optional, default None
1086
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1087
+ memory : int, default 4096
1088
+ Memory size (in MB) required for this step.
1089
+ shared_memory : int, optional, default None
1090
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1091
+ This parameter maps to the `--shm-size` option in Docker.
1092
+ """
1093
+ ...
1094
+
1095
+ @typing.overload
1096
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1097
+ """
1098
+ Internal decorator to support Fast bakery
1099
+ """
1100
+ ...
1101
+
1102
+ @typing.overload
1103
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1104
+ ...
1105
+
1106
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1107
+ """
1108
+ Internal decorator to support Fast bakery
1109
+ """
1110
+ ...
1111
+
1112
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1113
+ """
1114
+ Specifies that this step should execute on Kubernetes.
1115
+
1116
+
1117
+ Parameters
828
1118
  ----------
829
1119
  cpu : int, default 1
830
1120
  Number of CPUs required for this step. If `@resources` is
@@ -909,580 +1199,54 @@ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: ty
909
1199
  ...
910
1200
 
911
1201
  @typing.overload
912
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1202
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
913
1203
  """
914
- Enables checkpointing for a step.
1204
+ Decorator prototype for all step decorators. This function gets specialized
1205
+ and imported for all decorators types by _import_plugin_decorators().
1206
+ """
1207
+ ...
1208
+
1209
+ @typing.overload
1210
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1211
+ ...
1212
+
1213
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1214
+ """
1215
+ Decorator prototype for all step decorators. This function gets specialized
1216
+ and imported for all decorators types by _import_plugin_decorators().
1217
+ """
1218
+ ...
1219
+
1220
+ @typing.overload
1221
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1222
+ """
1223
+ Specifies environment variables to be set prior to the execution of a step.
915
1224
 
916
- > Examples
917
1225
 
918
- - Saving Checkpoints
919
-
920
- ```python
921
- @checkpoint
922
- @step
923
- def train(self):
924
- model = create_model(self.parameters, checkpoint_path = None)
925
- for i in range(self.epochs):
926
- # some training logic
927
- loss = model.train(self.dataset)
928
- if i % 10 == 0:
929
- model.save(
930
- current.checkpoint.directory,
931
- )
932
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
933
- # and returns a reference dictionary to the checkpoint saved in the datastore
934
- self.latest_checkpoint = current.checkpoint.save(
935
- name="epoch_checkpoint",
936
- metadata={
937
- "epoch": i,
938
- "loss": loss,
939
- }
940
- )
941
- ```
942
-
943
- - Using Loaded Checkpoints
944
-
945
- ```python
946
- @retry(times=3)
947
- @checkpoint
948
- @step
949
- def train(self):
950
- # Assume that the task has restarted and the previous attempt of the task
951
- # saved a checkpoint
952
- checkpoint_path = None
953
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
954
- print("Loaded checkpoint from the previous attempt")
955
- checkpoint_path = current.checkpoint.directory
956
-
957
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
958
- for i in range(self.epochs):
959
- ...
960
- ```
961
-
962
-
963
- Parameters
964
- ----------
965
- load_policy : str, default: "fresh"
966
- The policy for loading the checkpoint. The following policies are supported:
967
- - "eager": Loads the the latest available checkpoint within the namespace.
968
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
969
- will be loaded at the start of the task.
970
- - "none": Do not load any checkpoint
971
- - "fresh": Loads the lastest checkpoint created within the running Task.
972
- This mode helps loading checkpoints across various retry attempts of the same task.
973
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
974
- created within the task will be loaded when the task is retries execution on failure.
975
-
976
- temp_dir_root : str, default: None
977
- The root directory under which `current.checkpoint.directory` will be created.
978
- """
979
- ...
980
-
981
- @typing.overload
982
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
983
- ...
984
-
985
- @typing.overload
986
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
987
- ...
988
-
989
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
990
- """
991
- Enables checkpointing for a step.
992
-
993
- > Examples
994
-
995
- - Saving Checkpoints
996
-
997
- ```python
998
- @checkpoint
999
- @step
1000
- def train(self):
1001
- model = create_model(self.parameters, checkpoint_path = None)
1002
- for i in range(self.epochs):
1003
- # some training logic
1004
- loss = model.train(self.dataset)
1005
- if i % 10 == 0:
1006
- model.save(
1007
- current.checkpoint.directory,
1008
- )
1009
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1010
- # and returns a reference dictionary to the checkpoint saved in the datastore
1011
- self.latest_checkpoint = current.checkpoint.save(
1012
- name="epoch_checkpoint",
1013
- metadata={
1014
- "epoch": i,
1015
- "loss": loss,
1016
- }
1017
- )
1018
- ```
1019
-
1020
- - Using Loaded Checkpoints
1021
-
1022
- ```python
1023
- @retry(times=3)
1024
- @checkpoint
1025
- @step
1026
- def train(self):
1027
- # Assume that the task has restarted and the previous attempt of the task
1028
- # saved a checkpoint
1029
- checkpoint_path = None
1030
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1031
- print("Loaded checkpoint from the previous attempt")
1032
- checkpoint_path = current.checkpoint.directory
1033
-
1034
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1035
- for i in range(self.epochs):
1036
- ...
1037
- ```
1038
-
1039
-
1040
- Parameters
1041
- ----------
1042
- load_policy : str, default: "fresh"
1043
- The policy for loading the checkpoint. The following policies are supported:
1044
- - "eager": Loads the the latest available checkpoint within the namespace.
1045
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1046
- will be loaded at the start of the task.
1047
- - "none": Do not load any checkpoint
1048
- - "fresh": Loads the lastest checkpoint created within the running Task.
1049
- This mode helps loading checkpoints across various retry attempts of the same task.
1050
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1051
- created within the task will be loaded when the task is retries execution on failure.
1052
-
1053
- temp_dir_root : str, default: None
1054
- The root directory under which `current.checkpoint.directory` will be created.
1055
- """
1056
- ...
1057
-
1058
- @typing.overload
1059
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1060
- """
1061
- Specifies the number of times the task corresponding
1062
- to a step needs to be retried.
1063
-
1064
- This decorator is useful for handling transient errors, such as networking issues.
1065
- If your task contains operations that can't be retried safely, e.g. database updates,
1066
- it is advisable to annotate it with `@retry(times=0)`.
1067
-
1068
- This can be used in conjunction with the `@catch` decorator. The `@catch`
1069
- decorator will execute a no-op task after all retries have been exhausted,
1070
- ensuring that the flow execution can continue.
1071
-
1072
-
1073
- Parameters
1074
- ----------
1075
- times : int, default 3
1076
- Number of times to retry this task.
1077
- minutes_between_retries : int, default 2
1078
- Number of minutes between retries.
1079
- """
1080
- ...
1081
-
1082
- @typing.overload
1083
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1084
- ...
1085
-
1086
- @typing.overload
1087
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1088
- ...
1089
-
1090
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1091
- """
1092
- Specifies the number of times the task corresponding
1093
- to a step needs to be retried.
1094
-
1095
- This decorator is useful for handling transient errors, such as networking issues.
1096
- If your task contains operations that can't be retried safely, e.g. database updates,
1097
- it is advisable to annotate it with `@retry(times=0)`.
1098
-
1099
- This can be used in conjunction with the `@catch` decorator. The `@catch`
1100
- decorator will execute a no-op task after all retries have been exhausted,
1101
- ensuring that the flow execution can continue.
1102
-
1103
-
1104
- Parameters
1105
- ----------
1106
- times : int, default 3
1107
- Number of times to retry this task.
1108
- minutes_between_retries : int, default 2
1109
- Number of minutes between retries.
1110
- """
1111
- ...
1112
-
1113
- @typing.overload
1114
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1115
- """
1116
- Specifies secrets to be retrieved and injected as environment variables prior to
1117
- the execution of a step.
1118
-
1119
-
1120
- Parameters
1121
- ----------
1122
- sources : List[Union[str, Dict[str, Any]]], default: []
1123
- List of secret specs, defining how the secrets are to be retrieved
1124
- """
1125
- ...
1126
-
1127
- @typing.overload
1128
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1129
- ...
1130
-
1131
- @typing.overload
1132
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1133
- ...
1134
-
1135
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
1136
- """
1137
- Specifies secrets to be retrieved and injected as environment variables prior to
1138
- the execution of a step.
1139
-
1140
-
1141
- Parameters
1142
- ----------
1143
- sources : List[Union[str, Dict[str, Any]]], default: []
1144
- List of secret specs, defining how the secrets are to be retrieved
1145
- """
1146
- ...
1147
-
1148
- @typing.overload
1149
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1150
- """
1151
- Internal decorator to support Fast bakery
1152
- """
1153
- ...
1154
-
1155
- @typing.overload
1156
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1157
- ...
1158
-
1159
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1160
- """
1161
- Internal decorator to support Fast bakery
1162
- """
1163
- ...
1164
-
1165
- def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1166
- """
1167
- Specifies that this step is used to deploy an instance of the app.
1168
- Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
1169
-
1170
-
1171
- Parameters
1172
- ----------
1173
- app_port : int
1174
- Number of GPUs to use.
1175
- app_name : str
1176
- Name of the app to deploy.
1177
- """
1178
- ...
1179
-
1180
- @typing.overload
1181
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1182
- """
1183
- Decorator prototype for all step decorators. This function gets specialized
1184
- and imported for all decorators types by _import_plugin_decorators().
1185
- """
1186
- ...
1187
-
1188
- @typing.overload
1189
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1190
- ...
1191
-
1192
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1193
- """
1194
- Decorator prototype for all step decorators. This function gets specialized
1195
- and imported for all decorators types by _import_plugin_decorators().
1196
- """
1197
- ...
1198
-
1199
- @typing.overload
1200
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1201
- """
1202
- Creates a human-readable report, a Metaflow Card, after this step completes.
1203
-
1204
- Note that you may add multiple `@card` decorators in a step with different parameters.
1205
-
1206
-
1207
- Parameters
1208
- ----------
1209
- type : str, default 'default'
1210
- Card type.
1211
- id : str, optional, default None
1212
- If multiple cards are present, use this id to identify this card.
1213
- options : Dict[str, Any], default {}
1214
- Options passed to the card. The contents depend on the card type.
1215
- timeout : int, default 45
1216
- Interrupt reporting if it takes more than this many seconds.
1217
- """
1218
- ...
1219
-
1220
- @typing.overload
1221
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1222
- ...
1223
-
1224
- @typing.overload
1225
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1226
- ...
1227
-
1228
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1229
- """
1230
- Creates a human-readable report, a Metaflow Card, after this step completes.
1231
-
1232
- Note that you may add multiple `@card` decorators in a step with different parameters.
1233
-
1234
-
1235
- Parameters
1236
- ----------
1237
- type : str, default 'default'
1238
- Card type.
1239
- id : str, optional, default None
1240
- If multiple cards are present, use this id to identify this card.
1241
- options : Dict[str, Any], default {}
1242
- Options passed to the card. The contents depend on the card type.
1243
- timeout : int, default 45
1244
- Interrupt reporting if it takes more than this many seconds.
1245
- """
1246
- ...
1247
-
1248
- @typing.overload
1249
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1250
- """
1251
- Specifies the PyPI packages for all steps of the flow.
1252
-
1253
- Use `@pypi_base` to set common packages required by all
1254
- steps and use `@pypi` to specify step-specific overrides.
1255
-
1256
- Parameters
1257
- ----------
1258
- packages : Dict[str, str], default: {}
1259
- Packages to use for this flow. The key is the name of the package
1260
- and the value is the version to use.
1261
- python : str, optional, default: None
1262
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1263
- that the version used will correspond to the version of the Python interpreter used to start the run.
1264
- """
1265
- ...
1266
-
1267
- @typing.overload
1268
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1269
- ...
1270
-
1271
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1272
- """
1273
- Specifies the PyPI packages for all steps of the flow.
1274
-
1275
- Use `@pypi_base` to set common packages required by all
1276
- steps and use `@pypi` to specify step-specific overrides.
1277
-
1278
- Parameters
1279
- ----------
1280
- packages : Dict[str, str], default: {}
1281
- Packages to use for this flow. The key is the name of the package
1282
- and the value is the version to use.
1283
- python : str, optional, default: None
1284
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1285
- that the version used will correspond to the version of the Python interpreter used to start the run.
1286
- """
1287
- ...
1288
-
1289
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1290
- """
1291
- Allows setting external datastores to save data for the
1292
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1293
-
1294
- This decorator is useful when users wish to save data to a different datastore
1295
- than what is configured in Metaflow. This can be for variety of reasons:
1296
-
1297
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1298
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1299
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1300
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1301
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1302
-
1303
- Usage:
1304
- ----------
1305
-
1306
- - Using a custom IAM role to access the datastore.
1307
-
1308
- ```python
1309
- @with_artifact_store(
1310
- type="s3",
1311
- config=lambda: {
1312
- "root": "s3://my-bucket-foo/path/to/root",
1313
- "role_arn": ROLE,
1314
- },
1315
- )
1316
- class MyFlow(FlowSpec):
1317
-
1318
- @checkpoint
1319
- @step
1320
- def start(self):
1321
- with open("my_file.txt", "w") as f:
1322
- f.write("Hello, World!")
1323
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1324
- self.next(self.end)
1325
-
1326
- ```
1327
-
1328
- - Using credentials to access the s3-compatible datastore.
1329
-
1330
- ```python
1331
- @with_artifact_store(
1332
- type="s3",
1333
- config=lambda: {
1334
- "root": "s3://my-bucket-foo/path/to/root",
1335
- "client_params": {
1336
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1337
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1338
- },
1339
- },
1340
- )
1341
- class MyFlow(FlowSpec):
1342
-
1343
- @checkpoint
1344
- @step
1345
- def start(self):
1346
- with open("my_file.txt", "w") as f:
1347
- f.write("Hello, World!")
1348
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1349
- self.next(self.end)
1350
-
1351
- ```
1352
-
1353
- - Accessing objects stored in external datastores after task execution.
1354
-
1355
- ```python
1356
- run = Run("CheckpointsTestsFlow/8992")
1357
- with artifact_store_from(run=run, config={
1358
- "client_params": {
1359
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1360
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1361
- },
1362
- }):
1363
- with Checkpoint() as cp:
1364
- latest = cp.list(
1365
- task=run["start"].task
1366
- )[0]
1367
- print(latest)
1368
- cp.load(
1369
- latest,
1370
- "test-checkpoints"
1371
- )
1372
-
1373
- task = Task("TorchTuneFlow/8484/train/53673")
1374
- with artifact_store_from(run=run, config={
1375
- "client_params": {
1376
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1377
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1378
- },
1379
- }):
1380
- load_model(
1381
- task.data.model_ref,
1382
- "test-models"
1383
- )
1384
- ```
1385
- Parameters:
1386
- ----------
1387
-
1388
- type: str
1389
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1390
-
1391
- config: dict or Callable
1392
- Dictionary of configuration options for the datastore. The following keys are required:
1393
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1394
- - example: 's3://bucket-name/path/to/root'
1395
- - example: 'gs://bucket-name/path/to/root'
1396
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1397
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1398
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1399
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1400
- """
1401
- ...
1402
-
1403
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1404
- """
1405
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1406
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1407
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1408
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1409
- starts only after all sensors finish.
1410
-
1411
-
1412
- Parameters
1413
- ----------
1414
- timeout : int
1415
- Time, in seconds before the task times out and fails. (Default: 3600)
1416
- poke_interval : int
1417
- Time in seconds that the job should wait in between each try. (Default: 60)
1418
- mode : str
1419
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1420
- exponential_backoff : bool
1421
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1422
- pool : str
1423
- the slot pool this task should run in,
1424
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1425
- soft_fail : bool
1426
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1427
- name : str
1428
- Name of the sensor on Airflow
1429
- description : str
1430
- Description of sensor in the Airflow UI
1431
- bucket_key : Union[str, List[str]]
1432
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1433
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1434
- bucket_name : str
1435
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1436
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1437
- wildcard_match : bool
1438
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1439
- aws_conn_id : str
1440
- a reference to the s3 connection on Airflow. (Default: None)
1441
- verify : bool
1442
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1443
- """
1444
- ...
1445
-
1446
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1447
- """
1448
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1449
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1226
+ Parameters
1227
+ ----------
1228
+ vars : Dict[str, str], default {}
1229
+ Dictionary of environment variables to set.
1230
+ """
1231
+ ...
1232
+
1233
+ @typing.overload
1234
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1235
+ ...
1236
+
1237
+ @typing.overload
1238
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1239
+ ...
1240
+
1241
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1242
+ """
1243
+ Specifies environment variables to be set prior to the execution of a step.
1450
1244
 
1451
1245
 
1452
1246
  Parameters
1453
1247
  ----------
1454
- timeout : int
1455
- Time, in seconds before the task times out and fails. (Default: 3600)
1456
- poke_interval : int
1457
- Time in seconds that the job should wait in between each try. (Default: 60)
1458
- mode : str
1459
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1460
- exponential_backoff : bool
1461
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1462
- pool : str
1463
- the slot pool this task should run in,
1464
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1465
- soft_fail : bool
1466
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1467
- name : str
1468
- Name of the sensor on Airflow
1469
- description : str
1470
- Description of sensor in the Airflow UI
1471
- external_dag_id : str
1472
- The dag_id that contains the task you want to wait for.
1473
- external_task_ids : List[str]
1474
- The list of task_ids that you want to wait for.
1475
- If None (default value) the sensor waits for the DAG. (Default: None)
1476
- allowed_states : List[str]
1477
- Iterable of allowed states, (Default: ['success'])
1478
- failed_states : List[str]
1479
- Iterable of failed or dis-allowed states. (Default: None)
1480
- execution_delta : datetime.timedelta
1481
- time difference with the previous execution to look at,
1482
- the default is the same logical date as the current task or DAG. (Default: None)
1483
- check_existence: bool
1484
- Set to True to check if the external task exists or check if
1485
- the DAG to wait for exists. (Default: True)
1248
+ vars : Dict[str, str], default {}
1249
+ Dictionary of environment variables to set.
1486
1250
  """
1487
1251
  ...
1488
1252
 
@@ -1579,6 +1343,125 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1579
1343
  """
1580
1344
  ...
1581
1345
 
1346
+ @typing.overload
1347
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1348
+ """
1349
+ Specifies the PyPI packages for all steps of the flow.
1350
+
1351
+ Use `@pypi_base` to set common packages required by all
1352
+ steps and use `@pypi` to specify step-specific overrides.
1353
+
1354
+ Parameters
1355
+ ----------
1356
+ packages : Dict[str, str], default: {}
1357
+ Packages to use for this flow. The key is the name of the package
1358
+ and the value is the version to use.
1359
+ python : str, optional, default: None
1360
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1361
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1362
+ """
1363
+ ...
1364
+
1365
+ @typing.overload
1366
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1367
+ ...
1368
+
1369
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1370
+ """
1371
+ Specifies the PyPI packages for all steps of the flow.
1372
+
1373
+ Use `@pypi_base` to set common packages required by all
1374
+ steps and use `@pypi` to specify step-specific overrides.
1375
+
1376
+ Parameters
1377
+ ----------
1378
+ packages : Dict[str, str], default: {}
1379
+ Packages to use for this flow. The key is the name of the package
1380
+ and the value is the version to use.
1381
+ python : str, optional, default: None
1382
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1383
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1384
+ """
1385
+ ...
1386
+
1387
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1388
+ """
1389
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1390
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1391
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1392
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1393
+ starts only after all sensors finish.
1394
+
1395
+
1396
+ Parameters
1397
+ ----------
1398
+ timeout : int
1399
+ Time, in seconds before the task times out and fails. (Default: 3600)
1400
+ poke_interval : int
1401
+ Time in seconds that the job should wait in between each try. (Default: 60)
1402
+ mode : str
1403
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1404
+ exponential_backoff : bool
1405
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1406
+ pool : str
1407
+ the slot pool this task should run in,
1408
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1409
+ soft_fail : bool
1410
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1411
+ name : str
1412
+ Name of the sensor on Airflow
1413
+ description : str
1414
+ Description of sensor in the Airflow UI
1415
+ bucket_key : Union[str, List[str]]
1416
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1417
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1418
+ bucket_name : str
1419
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1420
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1421
+ wildcard_match : bool
1422
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1423
+ aws_conn_id : str
1424
+ a reference to the s3 connection on Airflow. (Default: None)
1425
+ verify : bool
1426
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1427
+ """
1428
+ ...
1429
+
1430
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1431
+ """
1432
+ Specifies what flows belong to the same project.
1433
+
1434
+ A project-specific namespace is created for all flows that
1435
+ use the same `@project(name)`.
1436
+
1437
+
1438
+ Parameters
1439
+ ----------
1440
+ name : str
1441
+ Project name. Make sure that the name is unique amongst all
1442
+ projects that use the same production scheduler. The name may
1443
+ contain only lowercase alphanumeric characters and underscores.
1444
+
1445
+ branch : Optional[str], default None
1446
+ The branch to use. If not specified, the branch is set to
1447
+ `user.<username>` unless `production` is set to `True`. This can
1448
+ also be set on the command line using `--branch` as a top-level option.
1449
+ It is an error to specify `branch` in the decorator and on the command line.
1450
+
1451
+ production : bool, default False
1452
+ Whether or not the branch is the production branch. This can also be set on the
1453
+ command line using `--production` as a top-level option. It is an error to specify
1454
+ `production` in the decorator and on the command line.
1455
+ The project branch name will be:
1456
+ - if `branch` is specified:
1457
+ - if `production` is True: `prod.<branch>`
1458
+ - if `production` is False: `test.<branch>`
1459
+ - if `branch` is not specified:
1460
+ - if `production` is True: `prod`
1461
+ - if `production` is False: `user.<username>`
1462
+ """
1463
+ ...
1464
+
1582
1465
  @typing.overload
1583
1466
  def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1584
1467
  """
@@ -1650,84 +1533,33 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1650
1533
  by specifying the fully qualified project_flow_name.
1651
1534
  ```
1652
1535
  @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1653
- ```
1654
- or
1655
- ```
1656
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1657
- ```
1658
-
1659
- You can also specify just the project or project branch (other values will be
1660
- inferred from the current project or project branch):
1661
- ```
1662
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1663
- ```
1664
-
1665
- Note that `branch` is typically one of:
1666
- - `prod`
1667
- - `user.bob`
1668
- - `test.my_experiment`
1669
- - `prod.staging`
1670
-
1671
-
1672
- Parameters
1673
- ----------
1674
- flow : Union[str, Dict[str, str]], optional, default None
1675
- Upstream flow dependency for this flow.
1676
- flows : List[Union[str, Dict[str, str]]], default []
1677
- Upstream flow dependencies for this flow.
1678
- options : Dict[str, Any], default {}
1679
- Backend-specific configuration for tuning eventing behavior.
1680
- """
1681
- ...
1682
-
1683
- @typing.overload
1684
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1685
- """
1686
- Specifies the times when the flow should be run when running on a
1687
- production scheduler.
1688
-
1689
-
1690
- Parameters
1691
- ----------
1692
- hourly : bool, default False
1693
- Run the workflow hourly.
1694
- daily : bool, default True
1695
- Run the workflow daily.
1696
- weekly : bool, default False
1697
- Run the workflow weekly.
1698
- cron : str, optional, default None
1699
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1700
- specified by this expression.
1701
- timezone : str, optional, default None
1702
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1703
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1704
- """
1705
- ...
1706
-
1707
- @typing.overload
1708
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1709
- ...
1710
-
1711
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1712
- """
1713
- Specifies the times when the flow should be run when running on a
1714
- production scheduler.
1536
+ ```
1537
+ or
1538
+ ```
1539
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1540
+ ```
1541
+
1542
+ You can also specify just the project or project branch (other values will be
1543
+ inferred from the current project or project branch):
1544
+ ```
1545
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1546
+ ```
1547
+
1548
+ Note that `branch` is typically one of:
1549
+ - `prod`
1550
+ - `user.bob`
1551
+ - `test.my_experiment`
1552
+ - `prod.staging`
1715
1553
 
1716
1554
 
1717
1555
  Parameters
1718
1556
  ----------
1719
- hourly : bool, default False
1720
- Run the workflow hourly.
1721
- daily : bool, default True
1722
- Run the workflow daily.
1723
- weekly : bool, default False
1724
- Run the workflow weekly.
1725
- cron : str, optional, default None
1726
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1727
- specified by this expression.
1728
- timezone : str, optional, default None
1729
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1730
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1557
+ flow : Union[str, Dict[str, str]], optional, default None
1558
+ Upstream flow dependency for this flow.
1559
+ flows : List[Union[str, Dict[str, str]]], default []
1560
+ Upstream flow dependencies for this flow.
1561
+ options : Dict[str, Any], default {}
1562
+ Backend-specific configuration for tuning eventing behavior.
1731
1563
  """
1732
1564
  ...
1733
1565
 
@@ -1782,38 +1614,211 @@ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packa
1782
1614
  """
1783
1615
  ...
1784
1616
 
1785
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1617
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1786
1618
  """
1787
- Specifies what flows belong to the same project.
1788
-
1789
- A project-specific namespace is created for all flows that
1790
- use the same `@project(name)`.
1619
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1620
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1791
1621
 
1792
1622
 
1793
1623
  Parameters
1794
1624
  ----------
1625
+ timeout : int
1626
+ Time, in seconds before the task times out and fails. (Default: 3600)
1627
+ poke_interval : int
1628
+ Time in seconds that the job should wait in between each try. (Default: 60)
1629
+ mode : str
1630
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1631
+ exponential_backoff : bool
1632
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1633
+ pool : str
1634
+ the slot pool this task should run in,
1635
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1636
+ soft_fail : bool
1637
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1795
1638
  name : str
1796
- Project name. Make sure that the name is unique amongst all
1797
- projects that use the same production scheduler. The name may
1798
- contain only lowercase alphanumeric characters and underscores.
1639
+ Name of the sensor on Airflow
1640
+ description : str
1641
+ Description of sensor in the Airflow UI
1642
+ external_dag_id : str
1643
+ The dag_id that contains the task you want to wait for.
1644
+ external_task_ids : List[str]
1645
+ The list of task_ids that you want to wait for.
1646
+ If None (default value) the sensor waits for the DAG. (Default: None)
1647
+ allowed_states : List[str]
1648
+ Iterable of allowed states, (Default: ['success'])
1649
+ failed_states : List[str]
1650
+ Iterable of failed or dis-allowed states. (Default: None)
1651
+ execution_delta : datetime.timedelta
1652
+ time difference with the previous execution to look at,
1653
+ the default is the same logical date as the current task or DAG. (Default: None)
1654
+ check_existence: bool
1655
+ Set to True to check if the external task exists or check if
1656
+ the DAG to wait for exists. (Default: True)
1657
+ """
1658
+ ...
1659
+
1660
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1661
+ """
1662
+ Allows setting external datastores to save data for the
1663
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1799
1664
 
1800
- branch : Optional[str], default None
1801
- The branch to use. If not specified, the branch is set to
1802
- `user.<username>` unless `production` is set to `True`. This can
1803
- also be set on the command line using `--branch` as a top-level option.
1804
- It is an error to specify `branch` in the decorator and on the command line.
1665
+ This decorator is useful when users wish to save data to a different datastore
1666
+ than what is configured in Metaflow. This can be for variety of reasons:
1805
1667
 
1806
- production : bool, default False
1807
- Whether or not the branch is the production branch. This can also be set on the
1808
- command line using `--production` as a top-level option. It is an error to specify
1809
- `production` in the decorator and on the command line.
1810
- The project branch name will be:
1811
- - if `branch` is specified:
1812
- - if `production` is True: `prod.<branch>`
1813
- - if `production` is False: `test.<branch>`
1814
- - if `branch` is not specified:
1815
- - if `production` is True: `prod`
1816
- - if `production` is False: `user.<username>`
1668
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1669
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1670
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1671
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1672
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1673
+
1674
+ Usage:
1675
+ ----------
1676
+
1677
+ - Using a custom IAM role to access the datastore.
1678
+
1679
+ ```python
1680
+ @with_artifact_store(
1681
+ type="s3",
1682
+ config=lambda: {
1683
+ "root": "s3://my-bucket-foo/path/to/root",
1684
+ "role_arn": ROLE,
1685
+ },
1686
+ )
1687
+ class MyFlow(FlowSpec):
1688
+
1689
+ @checkpoint
1690
+ @step
1691
+ def start(self):
1692
+ with open("my_file.txt", "w") as f:
1693
+ f.write("Hello, World!")
1694
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1695
+ self.next(self.end)
1696
+
1697
+ ```
1698
+
1699
+ - Using credentials to access the s3-compatible datastore.
1700
+
1701
+ ```python
1702
+ @with_artifact_store(
1703
+ type="s3",
1704
+ config=lambda: {
1705
+ "root": "s3://my-bucket-foo/path/to/root",
1706
+ "client_params": {
1707
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1708
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1709
+ },
1710
+ },
1711
+ )
1712
+ class MyFlow(FlowSpec):
1713
+
1714
+ @checkpoint
1715
+ @step
1716
+ def start(self):
1717
+ with open("my_file.txt", "w") as f:
1718
+ f.write("Hello, World!")
1719
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1720
+ self.next(self.end)
1721
+
1722
+ ```
1723
+
1724
+ - Accessing objects stored in external datastores after task execution.
1725
+
1726
+ ```python
1727
+ run = Run("CheckpointsTestsFlow/8992")
1728
+ with artifact_store_from(run=run, config={
1729
+ "client_params": {
1730
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1731
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1732
+ },
1733
+ }):
1734
+ with Checkpoint() as cp:
1735
+ latest = cp.list(
1736
+ task=run["start"].task
1737
+ )[0]
1738
+ print(latest)
1739
+ cp.load(
1740
+ latest,
1741
+ "test-checkpoints"
1742
+ )
1743
+
1744
+ task = Task("TorchTuneFlow/8484/train/53673")
1745
+ with artifact_store_from(run=run, config={
1746
+ "client_params": {
1747
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1748
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1749
+ },
1750
+ }):
1751
+ load_model(
1752
+ task.data.model_ref,
1753
+ "test-models"
1754
+ )
1755
+ ```
1756
+ Parameters:
1757
+ ----------
1758
+
1759
+ type: str
1760
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1761
+
1762
+ config: dict or Callable
1763
+ Dictionary of configuration options for the datastore. The following keys are required:
1764
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1765
+ - example: 's3://bucket-name/path/to/root'
1766
+ - example: 'gs://bucket-name/path/to/root'
1767
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1768
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1769
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1770
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1771
+ """
1772
+ ...
1773
+
1774
+ @typing.overload
1775
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1776
+ """
1777
+ Specifies the times when the flow should be run when running on a
1778
+ production scheduler.
1779
+
1780
+
1781
+ Parameters
1782
+ ----------
1783
+ hourly : bool, default False
1784
+ Run the workflow hourly.
1785
+ daily : bool, default True
1786
+ Run the workflow daily.
1787
+ weekly : bool, default False
1788
+ Run the workflow weekly.
1789
+ cron : str, optional, default None
1790
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1791
+ specified by this expression.
1792
+ timezone : str, optional, default None
1793
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1794
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1795
+ """
1796
+ ...
1797
+
1798
+ @typing.overload
1799
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1800
+ ...
1801
+
1802
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1803
+ """
1804
+ Specifies the times when the flow should be run when running on a
1805
+ production scheduler.
1806
+
1807
+
1808
+ Parameters
1809
+ ----------
1810
+ hourly : bool, default False
1811
+ Run the workflow hourly.
1812
+ daily : bool, default True
1813
+ Run the workflow daily.
1814
+ weekly : bool, default False
1815
+ Run the workflow weekly.
1816
+ cron : str, optional, default None
1817
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1818
+ specified by this expression.
1819
+ timezone : str, optional, default None
1820
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1821
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1817
1822
  """
1818
1823
  ...
1819
1824