ob-metaflow-stubs 6.0.3.176rc4__py2.py3-none-any.whl → 6.0.3.176rc6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. metaflow-stubs/__init__.pyi +676 -676
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +4 -4
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +1 -1
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +4 -4
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +1 -1
  14. metaflow-stubs/info_file.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +1 -1
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +52 -52
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +1 -1
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +2 -2
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +3 -3
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +3 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +2 -2
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +1 -1
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +1 -1
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +1 -1
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +2 -2
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +2 -2
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +1 -1
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +1 -1
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +1 -1
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +2 -2
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  85. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  86. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  89. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +1 -1
  90. metaflow-stubs/multicore_utils.pyi +1 -1
  91. metaflow-stubs/ob_internal.pyi +1 -1
  92. metaflow-stubs/parameters.pyi +1 -1
  93. metaflow-stubs/plugins/__init__.pyi +12 -12
  94. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  95. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  96. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  97. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  98. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  99. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  100. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  101. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  102. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  103. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  104. metaflow-stubs/plugins/argo/argo_workflows.pyi +1 -1
  105. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  106. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +1 -1
  107. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +1 -1
  108. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  109. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  110. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  111. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  112. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  113. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  114. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  115. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  116. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  117. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  118. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  119. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  120. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  121. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  122. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +1 -1
  123. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +1 -1
  124. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  125. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  126. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  127. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  128. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  129. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  130. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  131. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  132. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  133. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  134. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  135. metaflow-stubs/plugins/cards/card_decorator.pyi +1 -1
  136. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  137. metaflow-stubs/plugins/cards/card_modules/basic.pyi +1 -1
  138. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  139. metaflow-stubs/plugins/cards/card_modules/components.pyi +2 -2
  140. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  141. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  142. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  143. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  144. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  145. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  146. metaflow-stubs/plugins/catch_decorator.pyi +1 -1
  147. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  148. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  149. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  150. metaflow-stubs/plugins/datatools/s3/s3.pyi +2 -2
  151. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  152. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  153. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  154. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  155. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  156. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  157. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  158. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  159. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  160. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  161. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  162. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  163. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  164. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  165. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  166. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +1 -1
  167. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  168. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  169. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  170. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  171. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  172. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  173. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  174. metaflow-stubs/plugins/perimeters.pyi +1 -1
  175. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  176. metaflow-stubs/plugins/pypi/__init__.pyi +1 -1
  177. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  178. metaflow-stubs/plugins/pypi/conda_environment.pyi +5 -5
  179. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  180. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  181. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  182. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  183. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  184. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  185. metaflow-stubs/plugins/secrets/__init__.pyi +2 -2
  186. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +3 -3
  187. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  188. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  189. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  190. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  191. metaflow-stubs/plugins/timeout_decorator.pyi +1 -1
  192. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  193. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  194. metaflow-stubs/plugins/uv/uv_environment.pyi +2 -2
  195. metaflow-stubs/profilers/__init__.pyi +1 -1
  196. metaflow-stubs/pylint_wrapper.pyi +1 -1
  197. metaflow-stubs/runner/__init__.pyi +1 -1
  198. metaflow-stubs/runner/deployer.pyi +28 -28
  199. metaflow-stubs/runner/deployer_impl.pyi +1 -1
  200. metaflow-stubs/runner/metaflow_runner.pyi +2 -2
  201. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  202. metaflow-stubs/runner/nbrun.pyi +1 -1
  203. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  204. metaflow-stubs/runner/utils.pyi +2 -2
  205. metaflow-stubs/system/__init__.pyi +1 -1
  206. metaflow-stubs/system/system_logger.pyi +1 -1
  207. metaflow-stubs/system/system_monitor.pyi +1 -1
  208. metaflow-stubs/tagging_util.pyi +1 -1
  209. metaflow-stubs/tuple_util.pyi +1 -1
  210. metaflow-stubs/user_configs/__init__.pyi +1 -1
  211. metaflow-stubs/user_configs/config_decorators.pyi +6 -6
  212. metaflow-stubs/user_configs/config_options.pyi +1 -1
  213. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  214. {ob_metaflow_stubs-6.0.3.176rc4.dist-info → ob_metaflow_stubs-6.0.3.176rc6.dist-info}/METADATA +1 -1
  215. ob_metaflow_stubs-6.0.3.176rc6.dist-info/RECORD +218 -0
  216. ob_metaflow_stubs-6.0.3.176rc4.dist-info/RECORD +0 -218
  217. {ob_metaflow_stubs-6.0.3.176rc4.dist-info → ob_metaflow_stubs-6.0.3.176rc6.dist-info}/WHEEL +0 -0
  218. {ob_metaflow_stubs-6.0.3.176rc4.dist-info → ob_metaflow_stubs-6.0.3.176rc6.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.15.14.1+obcheckpoint(0.2.1);ob(v1) #
4
- # Generated on 2025-06-04T19:55:23.864727 #
4
+ # Generated on 2025-06-04T22:04:18.327442 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import typing
12
11
  import datetime
12
+ import typing
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -35,10 +35,10 @@ from .user_configs.config_parameters import ConfigValue as ConfigValue
35
35
  from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
+ from . import tuple_util as tuple_util
38
39
  from . import cards as cards
39
40
  from . import events as events
40
41
  from . import metaflow_git as metaflow_git
41
- from . import tuple_util as tuple_util
42
42
  from . import runner as runner
43
43
  from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
@@ -155,57 +155,59 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
155
155
  ...
156
156
 
157
157
  @typing.overload
158
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
158
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
159
159
  """
160
- Specifies the number of times the task corresponding
161
- to a step needs to be retried.
162
-
163
- This decorator is useful for handling transient errors, such as networking issues.
164
- If your task contains operations that can't be retried safely, e.g. database updates,
165
- it is advisable to annotate it with `@retry(times=0)`.
160
+ Enables checkpointing for a step.
166
161
 
167
- This can be used in conjunction with the `@catch` decorator. The `@catch`
168
- decorator will execute a no-op task after all retries have been exhausted,
169
- ensuring that the flow execution can continue.
170
162
 
171
163
 
172
164
  Parameters
173
165
  ----------
174
- times : int, default 3
175
- Number of times to retry this task.
176
- minutes_between_retries : int, default 2
177
- Number of minutes between retries.
166
+ load_policy : str, default: "fresh"
167
+ The policy for loading the checkpoint. The following policies are supported:
168
+ - "eager": Loads the the latest available checkpoint within the namespace.
169
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
170
+ will be loaded at the start of the task.
171
+ - "none": Do not load any checkpoint
172
+ - "fresh": Loads the lastest checkpoint created within the running Task.
173
+ This mode helps loading checkpoints across various retry attempts of the same task.
174
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
175
+ created within the task will be loaded when the task is retries execution on failure.
176
+
177
+ temp_dir_root : str, default: None
178
+ The root directory under which `current.checkpoint.directory` will be created.
178
179
  """
179
180
  ...
180
181
 
181
182
  @typing.overload
182
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
183
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
183
184
  ...
184
185
 
185
186
  @typing.overload
186
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
187
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
187
188
  ...
188
189
 
189
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
190
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
190
191
  """
191
- Specifies the number of times the task corresponding
192
- to a step needs to be retried.
193
-
194
- This decorator is useful for handling transient errors, such as networking issues.
195
- If your task contains operations that can't be retried safely, e.g. database updates,
196
- it is advisable to annotate it with `@retry(times=0)`.
192
+ Enables checkpointing for a step.
197
193
 
198
- This can be used in conjunction with the `@catch` decorator. The `@catch`
199
- decorator will execute a no-op task after all retries have been exhausted,
200
- ensuring that the flow execution can continue.
201
194
 
202
195
 
203
196
  Parameters
204
197
  ----------
205
- times : int, default 3
206
- Number of times to retry this task.
207
- minutes_between_retries : int, default 2
208
- Number of minutes between retries.
198
+ load_policy : str, default: "fresh"
199
+ The policy for loading the checkpoint. The following policies are supported:
200
+ - "eager": Loads the the latest available checkpoint within the namespace.
201
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
202
+ will be loaded at the start of the task.
203
+ - "none": Do not load any checkpoint
204
+ - "fresh": Loads the lastest checkpoint created within the running Task.
205
+ This mode helps loading checkpoints across various retry attempts of the same task.
206
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
207
+ created within the task will be loaded when the task is retries execution on failure.
208
+
209
+ temp_dir_root : str, default: None
210
+ The root directory under which `current.checkpoint.directory` will be created.
209
211
  """
210
212
  ...
211
213
 
@@ -223,150 +225,6 @@ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Cal
223
225
  """
224
226
  ...
225
227
 
226
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
227
- """
228
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
229
-
230
- User code call
231
- --------------
232
- @ollama(
233
- models=[...],
234
- ...
235
- )
236
-
237
- Valid backend options
238
- ---------------------
239
- - 'local': Run as a separate process on the local task machine.
240
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
241
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
242
-
243
- Valid model options
244
- -------------------
245
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
246
-
247
-
248
- Parameters
249
- ----------
250
- models: list[str]
251
- List of Ollama containers running models in sidecars.
252
- backend: str
253
- Determines where and how to run the Ollama process.
254
- force_pull: bool
255
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
256
- cache_update_policy: str
257
- Cache update policy: "auto", "force", or "never".
258
- force_cache_update: bool
259
- Simple override for "force" cache update policy.
260
- debug: bool
261
- Whether to turn on verbose debugging logs.
262
- circuit_breaker_config: dict
263
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
264
- timeout_config: dict
265
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
266
- """
267
- ...
268
-
269
- def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
270
- """
271
- Specifies that this step is used to deploy an instance of the app.
272
- Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
273
-
274
-
275
- Parameters
276
- ----------
277
- app_port : int
278
- Number of GPUs to use.
279
- app_name : str
280
- Name of the app to deploy.
281
- """
282
- ...
283
-
284
- @typing.overload
285
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
286
- """
287
- Specifies secrets to be retrieved and injected as environment variables prior to
288
- the execution of a step.
289
-
290
-
291
- Parameters
292
- ----------
293
- sources : List[Union[str, Dict[str, Any]]], default: []
294
- List of secret specs, defining how the secrets are to be retrieved
295
- """
296
- ...
297
-
298
- @typing.overload
299
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
300
- ...
301
-
302
- @typing.overload
303
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
304
- ...
305
-
306
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
307
- """
308
- Specifies secrets to be retrieved and injected as environment variables prior to
309
- the execution of a step.
310
-
311
-
312
- Parameters
313
- ----------
314
- sources : List[Union[str, Dict[str, Any]]], default: []
315
- List of secret specs, defining how the secrets are to be retrieved
316
- """
317
- ...
318
-
319
- @typing.overload
320
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
321
- """
322
- Specifies the PyPI packages for the step.
323
-
324
- Information in this decorator will augment any
325
- attributes set in the `@pyi_base` flow-level decorator. Hence,
326
- you can use `@pypi_base` to set packages required by all
327
- steps and use `@pypi` to specify step-specific overrides.
328
-
329
-
330
- Parameters
331
- ----------
332
- packages : Dict[str, str], default: {}
333
- Packages to use for this step. The key is the name of the package
334
- and the value is the version to use.
335
- python : str, optional, default: None
336
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
337
- that the version used will correspond to the version of the Python interpreter used to start the run.
338
- """
339
- ...
340
-
341
- @typing.overload
342
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
343
- ...
344
-
345
- @typing.overload
346
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
347
- ...
348
-
349
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
350
- """
351
- Specifies the PyPI packages for the step.
352
-
353
- Information in this decorator will augment any
354
- attributes set in the `@pyi_base` flow-level decorator. Hence,
355
- you can use `@pypi_base` to set packages required by all
356
- steps and use `@pypi` to specify step-specific overrides.
357
-
358
-
359
- Parameters
360
- ----------
361
- packages : Dict[str, str], default: {}
362
- Packages to use for this step. The key is the name of the package
363
- and the value is the version to use.
364
- python : str, optional, default: None
365
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
366
- that the version used will correspond to the version of the Python interpreter used to start the run.
367
- """
368
- ...
369
-
370
228
  @typing.overload
371
229
  def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
372
230
  """
@@ -447,112 +305,163 @@ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None]
447
305
  ...
448
306
 
449
307
  @typing.overload
450
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
308
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
451
309
  """
452
- Specifies a timeout for your step.
453
-
454
- This decorator is useful if this step may hang indefinitely.
455
-
456
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
457
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
458
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
459
-
460
- Note that all the values specified in parameters are added together so if you specify
461
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
310
+ Specifies environment variables to be set prior to the execution of a step.
462
311
 
463
312
 
464
313
  Parameters
465
314
  ----------
466
- seconds : int, default 0
467
- Number of seconds to wait prior to timing out.
468
- minutes : int, default 0
469
- Number of minutes to wait prior to timing out.
470
- hours : int, default 0
471
- Number of hours to wait prior to timing out.
315
+ vars : Dict[str, str], default {}
316
+ Dictionary of environment variables to set.
472
317
  """
473
318
  ...
474
319
 
475
320
  @typing.overload
476
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
321
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
477
322
  ...
478
323
 
479
324
  @typing.overload
480
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
325
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
481
326
  ...
482
327
 
483
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
328
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
484
329
  """
485
- Specifies a timeout for your step.
486
-
487
- This decorator is useful if this step may hang indefinitely.
488
-
489
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
490
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
491
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
492
-
493
- Note that all the values specified in parameters are added together so if you specify
494
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
330
+ Specifies environment variables to be set prior to the execution of a step.
495
331
 
496
332
 
497
333
  Parameters
498
334
  ----------
499
- seconds : int, default 0
500
- Number of seconds to wait prior to timing out.
501
- minutes : int, default 0
502
- Number of minutes to wait prior to timing out.
503
- hours : int, default 0
504
- Number of hours to wait prior to timing out.
335
+ vars : Dict[str, str], default {}
336
+ Dictionary of environment variables to set.
505
337
  """
506
338
  ...
507
339
 
508
- @typing.overload
509
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
340
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
510
341
  """
511
- Specifies that the step will success under all circumstances.
512
-
513
- The decorator will create an optional artifact, specified by `var`, which
514
- contains the exception raised. You can use it to detect the presence
515
- of errors, indicating that all happy-path artifacts produced by the step
516
- are missing.
342
+ Specifies that this step should execute on Kubernetes.
517
343
 
518
344
 
519
345
  Parameters
520
346
  ----------
521
- var : str, optional, default None
522
- Name of the artifact in which to store the caught exception.
523
- If not specified, the exception is not stored.
524
- print_exception : bool, default True
525
- Determines whether or not the exception is printed to
526
- stdout when caught.
347
+ cpu : int, default 1
348
+ Number of CPUs required for this step. If `@resources` is
349
+ also present, the maximum value from all decorators is used.
350
+ memory : int, default 4096
351
+ Memory size (in MB) required for this step. If
352
+ `@resources` is also present, the maximum value from all decorators is
353
+ used.
354
+ disk : int, default 10240
355
+ Disk size (in MB) required for this step. If
356
+ `@resources` is also present, the maximum value from all decorators is
357
+ used.
358
+ image : str, optional, default None
359
+ Docker image to use when launching on Kubernetes. If not specified, and
360
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
361
+ not, a default Docker image mapping to the current version of Python is used.
362
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
363
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
364
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
365
+ Kubernetes service account to use when launching pod in Kubernetes.
366
+ secrets : List[str], optional, default None
367
+ Kubernetes secrets to use when launching pod in Kubernetes. These
368
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
369
+ in Metaflow configuration.
370
+ node_selector: Union[Dict[str,str], str], optional, default None
371
+ Kubernetes node selector(s) to apply to the pod running the task.
372
+ Can be passed in as a comma separated string of values e.g.
373
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
374
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
375
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
376
+ Kubernetes namespace to use when launching pod in Kubernetes.
377
+ gpu : int, optional, default None
378
+ Number of GPUs required for this step. A value of zero implies that
379
+ the scheduled node should not have GPUs.
380
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
381
+ The vendor of the GPUs to be used for this step.
382
+ tolerations : List[str], default []
383
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
384
+ Kubernetes tolerations to use when launching pod in Kubernetes.
385
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
386
+ Kubernetes labels to use when launching pod in Kubernetes.
387
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
388
+ Kubernetes annotations to use when launching pod in Kubernetes.
389
+ use_tmpfs : bool, default False
390
+ This enables an explicit tmpfs mount for this step.
391
+ tmpfs_tempdir : bool, default True
392
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
393
+ tmpfs_size : int, optional, default: None
394
+ The value for the size (in MiB) of the tmpfs mount for this step.
395
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
396
+ memory allocated for this step.
397
+ tmpfs_path : str, optional, default /metaflow_temp
398
+ Path to tmpfs mount for this step.
399
+ persistent_volume_claims : Dict[str, str], optional, default None
400
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
401
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
402
+ shared_memory: int, optional
403
+ Shared memory size (in MiB) required for this step
404
+ port: int, optional
405
+ Port number to specify in the Kubernetes job object
406
+ compute_pool : str, optional, default None
407
+ Compute pool to be used for for this step.
408
+ If not specified, any accessible compute pool within the perimeter is used.
409
+ hostname_resolution_timeout: int, default 10 * 60
410
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
411
+ Only applicable when @parallel is used.
412
+ qos: str, default: Burstable
413
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
414
+
415
+ security_context: Dict[str, Any], optional, default None
416
+ Container security context. Applies to the task container. Allows the following keys:
417
+ - privileged: bool, optional, default None
418
+ - allow_privilege_escalation: bool, optional, default None
419
+ - run_as_user: int, optional, default None
420
+ - run_as_group: int, optional, default None
421
+ - run_as_non_root: bool, optional, default None
527
422
  """
528
423
  ...
529
424
 
530
- @typing.overload
531
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
532
- ...
533
-
534
- @typing.overload
535
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
536
- ...
537
-
538
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
425
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
539
426
  """
540
- Specifies that the step will success under all circumstances.
427
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
541
428
 
542
- The decorator will create an optional artifact, specified by `var`, which
543
- contains the exception raised. You can use it to detect the presence
544
- of errors, indicating that all happy-path artifacts produced by the step
545
- are missing.
429
+ User code call
430
+ --------------
431
+ @ollama(
432
+ models=[...],
433
+ ...
434
+ )
435
+
436
+ Valid backend options
437
+ ---------------------
438
+ - 'local': Run as a separate process on the local task machine.
439
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
440
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
441
+
442
+ Valid model options
443
+ -------------------
444
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
546
445
 
547
446
 
548
447
  Parameters
549
448
  ----------
550
- var : str, optional, default None
551
- Name of the artifact in which to store the caught exception.
552
- If not specified, the exception is not stored.
553
- print_exception : bool, default True
554
- Determines whether or not the exception is printed to
555
- stdout when caught.
449
+ models: list[str]
450
+ List of Ollama containers running models in sidecars.
451
+ backend: str
452
+ Determines where and how to run the Ollama process.
453
+ force_pull: bool
454
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
455
+ cache_update_policy: str
456
+ Cache update policy: "auto", "force", or "never".
457
+ force_cache_update: bool
458
+ Simple override for "force" cache update policy.
459
+ debug: bool
460
+ Whether to turn on verbose debugging logs.
461
+ circuit_breaker_config: dict
462
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
463
+ timeout_config: dict
464
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
556
465
  """
557
466
  ...
558
467
 
@@ -606,122 +515,193 @@ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
606
515
  ...
607
516
 
608
517
  @typing.overload
609
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
518
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
610
519
  """
611
- Decorator prototype for all step decorators. This function gets specialized
612
- and imported for all decorators types by _import_plugin_decorators().
520
+ Specifies a timeout for your step.
521
+
522
+ This decorator is useful if this step may hang indefinitely.
523
+
524
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
525
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
526
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
527
+
528
+ Note that all the values specified in parameters are added together so if you specify
529
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
530
+
531
+
532
+ Parameters
533
+ ----------
534
+ seconds : int, default 0
535
+ Number of seconds to wait prior to timing out.
536
+ minutes : int, default 0
537
+ Number of minutes to wait prior to timing out.
538
+ hours : int, default 0
539
+ Number of hours to wait prior to timing out.
613
540
  """
614
541
  ...
615
542
 
616
543
  @typing.overload
617
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
544
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
618
545
  ...
619
546
 
620
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
547
+ @typing.overload
548
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
549
+ ...
550
+
551
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
621
552
  """
622
- Decorator prototype for all step decorators. This function gets specialized
623
- and imported for all decorators types by _import_plugin_decorators().
553
+ Specifies a timeout for your step.
554
+
555
+ This decorator is useful if this step may hang indefinitely.
556
+
557
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
558
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
559
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
560
+
561
+ Note that all the values specified in parameters are added together so if you specify
562
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
563
+
564
+
565
+ Parameters
566
+ ----------
567
+ seconds : int, default 0
568
+ Number of seconds to wait prior to timing out.
569
+ minutes : int, default 0
570
+ Number of minutes to wait prior to timing out.
571
+ hours : int, default 0
572
+ Number of hours to wait prior to timing out.
624
573
  """
625
574
  ...
626
575
 
627
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
576
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
628
577
  """
629
- Specifies that this step should execute on Kubernetes.
578
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
630
579
 
631
580
 
632
581
  Parameters
633
582
  ----------
634
- cpu : int, default 1
635
- Number of CPUs required for this step. If `@resources` is
636
- also present, the maximum value from all decorators is used.
637
- memory : int, default 4096
638
- Memory size (in MB) required for this step. If
639
- `@resources` is also present, the maximum value from all decorators is
640
- used.
641
- disk : int, default 10240
642
- Disk size (in MB) required for this step. If
643
- `@resources` is also present, the maximum value from all decorators is
644
- used.
645
- image : str, optional, default None
646
- Docker image to use when launching on Kubernetes. If not specified, and
647
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
648
- not, a default Docker image mapping to the current version of Python is used.
649
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
650
- If given, the imagePullPolicy to be applied to the Docker image of the step.
651
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
652
- Kubernetes service account to use when launching pod in Kubernetes.
653
- secrets : List[str], optional, default None
654
- Kubernetes secrets to use when launching pod in Kubernetes. These
655
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
656
- in Metaflow configuration.
657
- node_selector: Union[Dict[str,str], str], optional, default None
658
- Kubernetes node selector(s) to apply to the pod running the task.
659
- Can be passed in as a comma separated string of values e.g.
660
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
661
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
662
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
663
- Kubernetes namespace to use when launching pod in Kubernetes.
664
- gpu : int, optional, default None
665
- Number of GPUs required for this step. A value of zero implies that
666
- the scheduled node should not have GPUs.
667
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
668
- The vendor of the GPUs to be used for this step.
669
- tolerations : List[str], default []
670
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
671
- Kubernetes tolerations to use when launching pod in Kubernetes.
672
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
673
- Kubernetes labels to use when launching pod in Kubernetes.
674
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
675
- Kubernetes annotations to use when launching pod in Kubernetes.
676
- use_tmpfs : bool, default False
677
- This enables an explicit tmpfs mount for this step.
678
- tmpfs_tempdir : bool, default True
679
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
680
- tmpfs_size : int, optional, default: None
681
- The value for the size (in MiB) of the tmpfs mount for this step.
682
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
683
- memory allocated for this step.
684
- tmpfs_path : str, optional, default /metaflow_temp
685
- Path to tmpfs mount for this step.
686
- persistent_volume_claims : Dict[str, str], optional, default None
687
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
688
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
689
- shared_memory: int, optional
690
- Shared memory size (in MiB) required for this step
691
- port: int, optional
692
- Port number to specify in the Kubernetes job object
693
- compute_pool : str, optional, default None
694
- Compute pool to be used for for this step.
695
- If not specified, any accessible compute pool within the perimeter is used.
696
- hostname_resolution_timeout: int, default 10 * 60
697
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
698
- Only applicable when @parallel is used.
699
- qos: str, default: Burstable
700
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
583
+ temp_dir_root : str, optional
584
+ The root directory that will hold the temporary directory where objects will be downloaded.
585
+
586
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
587
+ The list of repos (models/datasets) to load.
588
+
589
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
590
+
591
+ - If repo (model/dataset) is not found in the datastore:
592
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
593
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
594
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
595
+
596
+ - If repo is found in the datastore:
597
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
598
+ """
599
+ ...
600
+
601
+ @typing.overload
602
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
603
+ """
604
+ Enables loading / saving of models within a step.
605
+
606
+
607
+
608
+ Parameters
609
+ ----------
610
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
611
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
612
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
613
+ - `current.checkpoint`
614
+ - `current.model`
615
+ - `current.huggingface_hub`
616
+
617
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
618
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
619
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
620
+
621
+ temp_dir_root : str, default: None
622
+ The root directory under which `current.model.loaded` will store loaded models
623
+ """
624
+ ...
625
+
626
+ @typing.overload
627
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
628
+ ...
629
+
630
+ @typing.overload
631
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
632
+ ...
633
+
634
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
635
+ """
636
+ Enables loading / saving of models within a step.
637
+
638
+
701
639
 
702
- security_context: Dict[str, Any], optional, default None
703
- Container security context. Applies to the task container. Allows the following keys:
704
- - privileged: bool, optional, default None
705
- - allow_privilege_escalation: bool, optional, default None
706
- - run_as_user: int, optional, default None
707
- - run_as_group: int, optional, default None
708
- - run_as_non_root: bool, optional, default None
640
+ Parameters
641
+ ----------
642
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
643
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
644
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
645
+ - `current.checkpoint`
646
+ - `current.model`
647
+ - `current.huggingface_hub`
648
+
649
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
650
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
651
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
652
+
653
+ temp_dir_root : str, default: None
654
+ The root directory under which `current.model.loaded` will store loaded models
709
655
  """
710
656
  ...
711
657
 
712
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
658
+ def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
713
659
  """
714
- Specifies that this step should execute on DGX cloud.
660
+ Specifies that this step is used to deploy an instance of the app.
661
+ Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
715
662
 
716
663
 
717
664
  Parameters
718
665
  ----------
719
- gpu : int
666
+ app_port : int
720
667
  Number of GPUs to use.
721
- gpu_type : str
722
- Type of Nvidia GPU to use.
723
- queue_timeout : int
724
- Time to keep the job in NVCF's queue.
668
+ app_name : str
669
+ Name of the app to deploy.
670
+ """
671
+ ...
672
+
673
+ @typing.overload
674
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
675
+ """
676
+ Specifies secrets to be retrieved and injected as environment variables prior to
677
+ the execution of a step.
678
+
679
+
680
+ Parameters
681
+ ----------
682
+ sources : List[Union[str, Dict[str, Any]]], default: []
683
+ List of secret specs, defining how the secrets are to be retrieved
684
+ """
685
+ ...
686
+
687
+ @typing.overload
688
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
689
+ ...
690
+
691
+ @typing.overload
692
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
693
+ ...
694
+
695
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
696
+ """
697
+ Specifies secrets to be retrieved and injected as environment variables prior to
698
+ the execution of a step.
699
+
700
+
701
+ Parameters
702
+ ----------
703
+ sources : List[Union[str, Dict[str, Any]]], default: []
704
+ List of secret specs, defining how the secrets are to be retrieved
725
705
  """
726
706
  ...
727
707
 
@@ -743,59 +723,53 @@ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepF
743
723
  ...
744
724
 
745
725
  @typing.overload
746
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
726
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
747
727
  """
748
- Enables checkpointing for a step.
728
+ Specifies the PyPI packages for the step.
749
729
 
730
+ Information in this decorator will augment any
731
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
732
+ you can use `@pypi_base` to set packages required by all
733
+ steps and use `@pypi` to specify step-specific overrides.
750
734
 
751
735
 
752
736
  Parameters
753
737
  ----------
754
- load_policy : str, default: "fresh"
755
- The policy for loading the checkpoint. The following policies are supported:
756
- - "eager": Loads the the latest available checkpoint within the namespace.
757
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
758
- will be loaded at the start of the task.
759
- - "none": Do not load any checkpoint
760
- - "fresh": Loads the lastest checkpoint created within the running Task.
761
- This mode helps loading checkpoints across various retry attempts of the same task.
762
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
763
- created within the task will be loaded when the task is retries execution on failure.
764
-
765
- temp_dir_root : str, default: None
766
- The root directory under which `current.checkpoint.directory` will be created.
738
+ packages : Dict[str, str], default: {}
739
+ Packages to use for this step. The key is the name of the package
740
+ and the value is the version to use.
741
+ python : str, optional, default: None
742
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
743
+ that the version used will correspond to the version of the Python interpreter used to start the run.
767
744
  """
768
745
  ...
769
746
 
770
747
  @typing.overload
771
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
748
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
772
749
  ...
773
750
 
774
751
  @typing.overload
775
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
752
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
776
753
  ...
777
754
 
778
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
755
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
779
756
  """
780
- Enables checkpointing for a step.
757
+ Specifies the PyPI packages for the step.
781
758
 
759
+ Information in this decorator will augment any
760
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
761
+ you can use `@pypi_base` to set packages required by all
762
+ steps and use `@pypi` to specify step-specific overrides.
782
763
 
783
764
 
784
765
  Parameters
785
766
  ----------
786
- load_policy : str, default: "fresh"
787
- The policy for loading the checkpoint. The following policies are supported:
788
- - "eager": Loads the the latest available checkpoint within the namespace.
789
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
790
- will be loaded at the start of the task.
791
- - "none": Do not load any checkpoint
792
- - "fresh": Loads the lastest checkpoint created within the running Task.
793
- This mode helps loading checkpoints across various retry attempts of the same task.
794
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
795
- created within the task will be loaded when the task is retries execution on failure.
796
-
797
- temp_dir_root : str, default: None
798
- The root directory under which `current.checkpoint.directory` will be created.
767
+ packages : Dict[str, str], default: {}
768
+ Packages to use for this step. The key is the name of the package
769
+ and the value is the version to use.
770
+ python : str, optional, default: None
771
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
772
+ that the version used will correspond to the version of the Python interpreter used to start the run.
799
773
  """
800
774
  ...
801
775
 
@@ -858,161 +832,144 @@ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
858
832
  """
859
833
  ...
860
834
 
861
- @typing.overload
862
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
835
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
863
836
  """
864
- Specifies environment variables to be set prior to the execution of a step.
837
+ Specifies that this step should execute on DGX cloud.
865
838
 
866
839
 
867
840
  Parameters
868
841
  ----------
869
- vars : Dict[str, str], default {}
870
- Dictionary of environment variables to set.
842
+ gpu : int
843
+ Number of GPUs to use.
844
+ gpu_type : str
845
+ Type of Nvidia GPU to use.
846
+ queue_timeout : int
847
+ Time to keep the job in NVCF's queue.
871
848
  """
872
849
  ...
873
850
 
874
851
  @typing.overload
875
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
852
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
853
+ """
854
+ Decorator prototype for all step decorators. This function gets specialized
855
+ and imported for all decorators types by _import_plugin_decorators().
856
+ """
876
857
  ...
877
858
 
878
859
  @typing.overload
879
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
880
- ...
881
-
882
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
883
- """
884
- Specifies environment variables to be set prior to the execution of a step.
885
-
886
-
887
- Parameters
888
- ----------
889
- vars : Dict[str, str], default {}
890
- Dictionary of environment variables to set.
891
- """
860
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
892
861
  ...
893
862
 
894
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
863
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
895
864
  """
896
- Decorator that helps cache, version and store models/datasets from huggingface hub.
897
-
898
-
899
- Parameters
900
- ----------
901
- temp_dir_root : str, optional
902
- The root directory that will hold the temporary directory where objects will be downloaded.
903
-
904
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
905
- The list of repos (models/datasets) to load.
906
-
907
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
908
-
909
- - If repo (model/dataset) is not found in the datastore:
910
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
911
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
912
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
913
-
914
- - If repo is found in the datastore:
915
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
865
+ Decorator prototype for all step decorators. This function gets specialized
866
+ and imported for all decorators types by _import_plugin_decorators().
916
867
  """
917
868
  ...
918
869
 
919
870
  @typing.overload
920
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
871
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
921
872
  """
922
- Enables loading / saving of models within a step.
873
+ Specifies the number of times the task corresponding
874
+ to a step needs to be retried.
875
+
876
+ This decorator is useful for handling transient errors, such as networking issues.
877
+ If your task contains operations that can't be retried safely, e.g. database updates,
878
+ it is advisable to annotate it with `@retry(times=0)`.
923
879
 
880
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
881
+ decorator will execute a no-op task after all retries have been exhausted,
882
+ ensuring that the flow execution can continue.
924
883
 
925
884
 
926
885
  Parameters
927
886
  ----------
928
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
929
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
930
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
931
- - `current.checkpoint`
932
- - `current.model`
933
- - `current.huggingface_hub`
934
-
935
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
936
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
937
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
938
-
939
- temp_dir_root : str, default: None
940
- The root directory under which `current.model.loaded` will store loaded models
887
+ times : int, default 3
888
+ Number of times to retry this task.
889
+ minutes_between_retries : int, default 2
890
+ Number of minutes between retries.
941
891
  """
942
892
  ...
943
893
 
944
894
  @typing.overload
945
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
895
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
946
896
  ...
947
897
 
948
898
  @typing.overload
949
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
899
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
950
900
  ...
951
901
 
952
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
902
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
953
903
  """
954
- Enables loading / saving of models within a step.
904
+ Specifies the number of times the task corresponding
905
+ to a step needs to be retried.
906
+
907
+ This decorator is useful for handling transient errors, such as networking issues.
908
+ If your task contains operations that can't be retried safely, e.g. database updates,
909
+ it is advisable to annotate it with `@retry(times=0)`.
955
910
 
911
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
912
+ decorator will execute a no-op task after all retries have been exhausted,
913
+ ensuring that the flow execution can continue.
956
914
 
957
915
 
958
916
  Parameters
959
917
  ----------
960
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
961
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
962
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
963
- - `current.checkpoint`
964
- - `current.model`
965
- - `current.huggingface_hub`
918
+ times : int, default 3
919
+ Number of times to retry this task.
920
+ minutes_between_retries : int, default 2
921
+ Number of minutes between retries.
922
+ """
923
+ ...
924
+
925
+ @typing.overload
926
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
927
+ """
928
+ Specifies that the step will success under all circumstances.
966
929
 
967
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
968
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
969
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
930
+ The decorator will create an optional artifact, specified by `var`, which
931
+ contains the exception raised. You can use it to detect the presence
932
+ of errors, indicating that all happy-path artifacts produced by the step
933
+ are missing.
970
934
 
971
- temp_dir_root : str, default: None
972
- The root directory under which `current.model.loaded` will store loaded models
935
+
936
+ Parameters
937
+ ----------
938
+ var : str, optional, default None
939
+ Name of the artifact in which to store the caught exception.
940
+ If not specified, the exception is not stored.
941
+ print_exception : bool, default True
942
+ Determines whether or not the exception is printed to
943
+ stdout when caught.
973
944
  """
974
945
  ...
975
946
 
976
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
947
+ @typing.overload
948
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
949
+ ...
950
+
951
+ @typing.overload
952
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
953
+ ...
954
+
955
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
977
956
  """
978
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
979
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
957
+ Specifies that the step will success under all circumstances.
958
+
959
+ The decorator will create an optional artifact, specified by `var`, which
960
+ contains the exception raised. You can use it to detect the presence
961
+ of errors, indicating that all happy-path artifacts produced by the step
962
+ are missing.
980
963
 
981
964
 
982
965
  Parameters
983
966
  ----------
984
- timeout : int
985
- Time, in seconds before the task times out and fails. (Default: 3600)
986
- poke_interval : int
987
- Time in seconds that the job should wait in between each try. (Default: 60)
988
- mode : str
989
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
990
- exponential_backoff : bool
991
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
992
- pool : str
993
- the slot pool this task should run in,
994
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
995
- soft_fail : bool
996
- Set to true to mark the task as SKIPPED on failure. (Default: False)
997
- name : str
998
- Name of the sensor on Airflow
999
- description : str
1000
- Description of sensor in the Airflow UI
1001
- external_dag_id : str
1002
- The dag_id that contains the task you want to wait for.
1003
- external_task_ids : List[str]
1004
- The list of task_ids that you want to wait for.
1005
- If None (default value) the sensor waits for the DAG. (Default: None)
1006
- allowed_states : List[str]
1007
- Iterable of allowed states, (Default: ['success'])
1008
- failed_states : List[str]
1009
- Iterable of failed or dis-allowed states. (Default: None)
1010
- execution_delta : datetime.timedelta
1011
- time difference with the previous execution to look at,
1012
- the default is the same logical date as the current task or DAG. (Default: None)
1013
- check_existence: bool
1014
- Set to True to check if the external task exists or check if
1015
- the DAG to wait for exists. (Default: True)
967
+ var : str, optional, default None
968
+ Name of the artifact in which to store the caught exception.
969
+ If not specified, the exception is not stored.
970
+ print_exception : bool, default True
971
+ Determines whether or not the exception is printed to
972
+ stdout when caught.
1016
973
  """
1017
974
  ...
1018
975
 
@@ -1117,155 +1074,6 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1117
1074
  """
1118
1075
  ...
1119
1076
 
1120
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1121
- """
1122
- Specifies what flows belong to the same project.
1123
-
1124
- A project-specific namespace is created for all flows that
1125
- use the same `@project(name)`.
1126
-
1127
-
1128
- Parameters
1129
- ----------
1130
- name : str
1131
- Project name. Make sure that the name is unique amongst all
1132
- projects that use the same production scheduler. The name may
1133
- contain only lowercase alphanumeric characters and underscores.
1134
-
1135
- branch : Optional[str], default None
1136
- The branch to use. If not specified, the branch is set to
1137
- `user.<username>` unless `production` is set to `True`. This can
1138
- also be set on the command line using `--branch` as a top-level option.
1139
- It is an error to specify `branch` in the decorator and on the command line.
1140
-
1141
- production : bool, default False
1142
- Whether or not the branch is the production branch. This can also be set on the
1143
- command line using `--production` as a top-level option. It is an error to specify
1144
- `production` in the decorator and on the command line.
1145
- The project branch name will be:
1146
- - if `branch` is specified:
1147
- - if `production` is True: `prod.<branch>`
1148
- - if `production` is False: `test.<branch>`
1149
- - if `branch` is not specified:
1150
- - if `production` is True: `prod`
1151
- - if `production` is False: `user.<username>`
1152
- """
1153
- ...
1154
-
1155
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1156
- """
1157
- Allows setting external datastores to save data for the
1158
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1159
-
1160
- This decorator is useful when users wish to save data to a different datastore
1161
- than what is configured in Metaflow. This can be for variety of reasons:
1162
-
1163
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1164
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1165
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1166
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1167
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1168
-
1169
- Usage:
1170
- ----------
1171
-
1172
- - Using a custom IAM role to access the datastore.
1173
-
1174
- ```python
1175
- @with_artifact_store(
1176
- type="s3",
1177
- config=lambda: {
1178
- "root": "s3://my-bucket-foo/path/to/root",
1179
- "role_arn": ROLE,
1180
- },
1181
- )
1182
- class MyFlow(FlowSpec):
1183
-
1184
- @checkpoint
1185
- @step
1186
- def start(self):
1187
- with open("my_file.txt", "w") as f:
1188
- f.write("Hello, World!")
1189
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1190
- self.next(self.end)
1191
-
1192
- ```
1193
-
1194
- - Using credentials to access the s3-compatible datastore.
1195
-
1196
- ```python
1197
- @with_artifact_store(
1198
- type="s3",
1199
- config=lambda: {
1200
- "root": "s3://my-bucket-foo/path/to/root",
1201
- "client_params": {
1202
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1203
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1204
- },
1205
- },
1206
- )
1207
- class MyFlow(FlowSpec):
1208
-
1209
- @checkpoint
1210
- @step
1211
- def start(self):
1212
- with open("my_file.txt", "w") as f:
1213
- f.write("Hello, World!")
1214
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1215
- self.next(self.end)
1216
-
1217
- ```
1218
-
1219
- - Accessing objects stored in external datastores after task execution.
1220
-
1221
- ```python
1222
- run = Run("CheckpointsTestsFlow/8992")
1223
- with artifact_store_from(run=run, config={
1224
- "client_params": {
1225
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1226
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1227
- },
1228
- }):
1229
- with Checkpoint() as cp:
1230
- latest = cp.list(
1231
- task=run["start"].task
1232
- )[0]
1233
- print(latest)
1234
- cp.load(
1235
- latest,
1236
- "test-checkpoints"
1237
- )
1238
-
1239
- task = Task("TorchTuneFlow/8484/train/53673")
1240
- with artifact_store_from(run=run, config={
1241
- "client_params": {
1242
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1243
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1244
- },
1245
- }):
1246
- load_model(
1247
- task.data.model_ref,
1248
- "test-models"
1249
- )
1250
- ```
1251
- Parameters:
1252
- ----------
1253
-
1254
- type: str
1255
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1256
-
1257
- config: dict or Callable
1258
- Dictionary of configuration options for the datastore. The following keys are required:
1259
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1260
- - example: 's3://bucket-name/path/to/root'
1261
- - example: 'gs://bucket-name/path/to/root'
1262
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1263
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1264
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1265
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1266
- """
1267
- ...
1268
-
1269
1077
  @typing.overload
1270
1078
  def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1271
1079
  """
@@ -1350,12 +1158,63 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1350
1158
 
1351
1159
  Parameters
1352
1160
  ----------
1353
- event : Union[str, Dict[str, Any]], optional, default None
1354
- Event dependency for this flow.
1355
- events : List[Union[str, Dict[str, Any]]], default []
1356
- Events dependency for this flow.
1357
- options : Dict[str, Any], default {}
1358
- Backend-specific configuration for tuning eventing behavior.
1161
+ event : Union[str, Dict[str, Any]], optional, default None
1162
+ Event dependency for this flow.
1163
+ events : List[Union[str, Dict[str, Any]]], default []
1164
+ Events dependency for this flow.
1165
+ options : Dict[str, Any], default {}
1166
+ Backend-specific configuration for tuning eventing behavior.
1167
+ """
1168
+ ...
1169
+
1170
+ @typing.overload
1171
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1172
+ """
1173
+ Specifies the Conda environment for all steps of the flow.
1174
+
1175
+ Use `@conda_base` to set common libraries required by all
1176
+ steps and use `@conda` to specify step-specific additions.
1177
+
1178
+
1179
+ Parameters
1180
+ ----------
1181
+ packages : Dict[str, str], default {}
1182
+ Packages to use for this flow. The key is the name of the package
1183
+ and the value is the version to use.
1184
+ libraries : Dict[str, str], default {}
1185
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1186
+ python : str, optional, default None
1187
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1188
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1189
+ disabled : bool, default False
1190
+ If set to True, disables Conda.
1191
+ """
1192
+ ...
1193
+
1194
+ @typing.overload
1195
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1196
+ ...
1197
+
1198
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1199
+ """
1200
+ Specifies the Conda environment for all steps of the flow.
1201
+
1202
+ Use `@conda_base` to set common libraries required by all
1203
+ steps and use `@conda` to specify step-specific additions.
1204
+
1205
+
1206
+ Parameters
1207
+ ----------
1208
+ packages : Dict[str, str], default {}
1209
+ Packages to use for this flow. The key is the name of the package
1210
+ and the value is the version to use.
1211
+ libraries : Dict[str, str], default {}
1212
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1213
+ python : str, optional, default None
1214
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1215
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1216
+ disabled : bool, default False
1217
+ If set to True, disables Conda.
1359
1218
  """
1360
1219
  ...
1361
1220
 
@@ -1411,53 +1270,43 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
1411
1270
  ...
1412
1271
 
1413
1272
  @typing.overload
1414
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1273
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1415
1274
  """
1416
- Specifies the Conda environment for all steps of the flow.
1417
-
1418
- Use `@conda_base` to set common libraries required by all
1419
- steps and use `@conda` to specify step-specific additions.
1275
+ Specifies the PyPI packages for all steps of the flow.
1420
1276
 
1277
+ Use `@pypi_base` to set common packages required by all
1278
+ steps and use `@pypi` to specify step-specific overrides.
1421
1279
 
1422
1280
  Parameters
1423
1281
  ----------
1424
- packages : Dict[str, str], default {}
1282
+ packages : Dict[str, str], default: {}
1425
1283
  Packages to use for this flow. The key is the name of the package
1426
1284
  and the value is the version to use.
1427
- libraries : Dict[str, str], default {}
1428
- Supported for backward compatibility. When used with packages, packages will take precedence.
1429
- python : str, optional, default None
1285
+ python : str, optional, default: None
1430
1286
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1431
1287
  that the version used will correspond to the version of the Python interpreter used to start the run.
1432
- disabled : bool, default False
1433
- If set to True, disables Conda.
1434
1288
  """
1435
1289
  ...
1436
1290
 
1437
1291
  @typing.overload
1438
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1292
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1439
1293
  ...
1440
1294
 
1441
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1295
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1442
1296
  """
1443
- Specifies the Conda environment for all steps of the flow.
1444
-
1445
- Use `@conda_base` to set common libraries required by all
1446
- steps and use `@conda` to specify step-specific additions.
1297
+ Specifies the PyPI packages for all steps of the flow.
1447
1298
 
1299
+ Use `@pypi_base` to set common packages required by all
1300
+ steps and use `@pypi` to specify step-specific overrides.
1448
1301
 
1449
1302
  Parameters
1450
1303
  ----------
1451
- packages : Dict[str, str], default {}
1304
+ packages : Dict[str, str], default: {}
1452
1305
  Packages to use for this flow. The key is the name of the package
1453
1306
  and the value is the version to use.
1454
- libraries : Dict[str, str], default {}
1455
- Supported for backward compatibility. When used with packages, packages will take precedence.
1456
- python : str, optional, default None
1307
+ python : str, optional, default: None
1457
1308
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1458
1309
  that the version used will correspond to the version of the Python interpreter used to start the run.
1459
- disabled : bool, default False
1460
- If set to True, disables Conda.
1461
1310
  """
1462
1311
  ...
1463
1312
 
@@ -1504,44 +1353,195 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
1504
1353
  """
1505
1354
  ...
1506
1355
 
1507
- @typing.overload
1508
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1356
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1509
1357
  """
1510
- Specifies the PyPI packages for all steps of the flow.
1358
+ Specifies what flows belong to the same project.
1359
+
1360
+ A project-specific namespace is created for all flows that
1361
+ use the same `@project(name)`.
1511
1362
 
1512
- Use `@pypi_base` to set common packages required by all
1513
- steps and use `@pypi` to specify step-specific overrides.
1514
1363
 
1515
1364
  Parameters
1516
1365
  ----------
1517
- packages : Dict[str, str], default: {}
1518
- Packages to use for this flow. The key is the name of the package
1519
- and the value is the version to use.
1520
- python : str, optional, default: None
1521
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1522
- that the version used will correspond to the version of the Python interpreter used to start the run.
1366
+ name : str
1367
+ Project name. Make sure that the name is unique amongst all
1368
+ projects that use the same production scheduler. The name may
1369
+ contain only lowercase alphanumeric characters and underscores.
1370
+
1371
+ branch : Optional[str], default None
1372
+ The branch to use. If not specified, the branch is set to
1373
+ `user.<username>` unless `production` is set to `True`. This can
1374
+ also be set on the command line using `--branch` as a top-level option.
1375
+ It is an error to specify `branch` in the decorator and on the command line.
1376
+
1377
+ production : bool, default False
1378
+ Whether or not the branch is the production branch. This can also be set on the
1379
+ command line using `--production` as a top-level option. It is an error to specify
1380
+ `production` in the decorator and on the command line.
1381
+ The project branch name will be:
1382
+ - if `branch` is specified:
1383
+ - if `production` is True: `prod.<branch>`
1384
+ - if `production` is False: `test.<branch>`
1385
+ - if `branch` is not specified:
1386
+ - if `production` is True: `prod`
1387
+ - if `production` is False: `user.<username>`
1523
1388
  """
1524
1389
  ...
1525
1390
 
1526
- @typing.overload
1527
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1391
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1392
+ """
1393
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1394
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1395
+
1396
+
1397
+ Parameters
1398
+ ----------
1399
+ timeout : int
1400
+ Time, in seconds before the task times out and fails. (Default: 3600)
1401
+ poke_interval : int
1402
+ Time in seconds that the job should wait in between each try. (Default: 60)
1403
+ mode : str
1404
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1405
+ exponential_backoff : bool
1406
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1407
+ pool : str
1408
+ the slot pool this task should run in,
1409
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1410
+ soft_fail : bool
1411
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1412
+ name : str
1413
+ Name of the sensor on Airflow
1414
+ description : str
1415
+ Description of sensor in the Airflow UI
1416
+ external_dag_id : str
1417
+ The dag_id that contains the task you want to wait for.
1418
+ external_task_ids : List[str]
1419
+ The list of task_ids that you want to wait for.
1420
+ If None (default value) the sensor waits for the DAG. (Default: None)
1421
+ allowed_states : List[str]
1422
+ Iterable of allowed states, (Default: ['success'])
1423
+ failed_states : List[str]
1424
+ Iterable of failed or dis-allowed states. (Default: None)
1425
+ execution_delta : datetime.timedelta
1426
+ time difference with the previous execution to look at,
1427
+ the default is the same logical date as the current task or DAG. (Default: None)
1428
+ check_existence: bool
1429
+ Set to True to check if the external task exists or check if
1430
+ the DAG to wait for exists. (Default: True)
1431
+ """
1528
1432
  ...
1529
1433
 
1530
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1434
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1531
1435
  """
1532
- Specifies the PyPI packages for all steps of the flow.
1436
+ Allows setting external datastores to save data for the
1437
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1533
1438
 
1534
- Use `@pypi_base` to set common packages required by all
1535
- steps and use `@pypi` to specify step-specific overrides.
1439
+ This decorator is useful when users wish to save data to a different datastore
1440
+ than what is configured in Metaflow. This can be for variety of reasons:
1536
1441
 
1537
- Parameters
1442
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1443
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1444
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1445
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1446
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1447
+
1448
+ Usage:
1538
1449
  ----------
1539
- packages : Dict[str, str], default: {}
1540
- Packages to use for this flow. The key is the name of the package
1541
- and the value is the version to use.
1542
- python : str, optional, default: None
1543
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1544
- that the version used will correspond to the version of the Python interpreter used to start the run.
1450
+
1451
+ - Using a custom IAM role to access the datastore.
1452
+
1453
+ ```python
1454
+ @with_artifact_store(
1455
+ type="s3",
1456
+ config=lambda: {
1457
+ "root": "s3://my-bucket-foo/path/to/root",
1458
+ "role_arn": ROLE,
1459
+ },
1460
+ )
1461
+ class MyFlow(FlowSpec):
1462
+
1463
+ @checkpoint
1464
+ @step
1465
+ def start(self):
1466
+ with open("my_file.txt", "w") as f:
1467
+ f.write("Hello, World!")
1468
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1469
+ self.next(self.end)
1470
+
1471
+ ```
1472
+
1473
+ - Using credentials to access the s3-compatible datastore.
1474
+
1475
+ ```python
1476
+ @with_artifact_store(
1477
+ type="s3",
1478
+ config=lambda: {
1479
+ "root": "s3://my-bucket-foo/path/to/root",
1480
+ "client_params": {
1481
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1482
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1483
+ },
1484
+ },
1485
+ )
1486
+ class MyFlow(FlowSpec):
1487
+
1488
+ @checkpoint
1489
+ @step
1490
+ def start(self):
1491
+ with open("my_file.txt", "w") as f:
1492
+ f.write("Hello, World!")
1493
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1494
+ self.next(self.end)
1495
+
1496
+ ```
1497
+
1498
+ - Accessing objects stored in external datastores after task execution.
1499
+
1500
+ ```python
1501
+ run = Run("CheckpointsTestsFlow/8992")
1502
+ with artifact_store_from(run=run, config={
1503
+ "client_params": {
1504
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1505
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1506
+ },
1507
+ }):
1508
+ with Checkpoint() as cp:
1509
+ latest = cp.list(
1510
+ task=run["start"].task
1511
+ )[0]
1512
+ print(latest)
1513
+ cp.load(
1514
+ latest,
1515
+ "test-checkpoints"
1516
+ )
1517
+
1518
+ task = Task("TorchTuneFlow/8484/train/53673")
1519
+ with artifact_store_from(run=run, config={
1520
+ "client_params": {
1521
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1522
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1523
+ },
1524
+ }):
1525
+ load_model(
1526
+ task.data.model_ref,
1527
+ "test-models"
1528
+ )
1529
+ ```
1530
+ Parameters:
1531
+ ----------
1532
+
1533
+ type: str
1534
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1535
+
1536
+ config: dict or Callable
1537
+ Dictionary of configuration options for the datastore. The following keys are required:
1538
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1539
+ - example: 's3://bucket-name/path/to/root'
1540
+ - example: 'gs://bucket-name/path/to/root'
1541
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1542
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1543
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1544
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1545
1545
  """
1546
1546
  ...
1547
1547