ob-metaflow-stubs 6.0.3.162__py2.py3-none-any.whl → 6.0.3.164__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. metaflow-stubs/__init__.pyi +882 -868
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +5 -5
  8. metaflow-stubs/client/filecache.pyi +3 -3
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +6 -6
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +2 -2
  14. metaflow-stubs/info_file.pyi +2 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +3 -3
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +2 -2
  20. metaflow-stubs/metaflow_current.pyi +123 -123
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +3 -3
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +4 -4
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +5 -5
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +4 -4
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +3 -3
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +2 -2
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +4 -4
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +3 -3
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +3 -3
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  75. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -2
  80. metaflow-stubs/multicore_utils.pyi +2 -2
  81. metaflow-stubs/parameters.pyi +3 -3
  82. metaflow-stubs/plugins/__init__.pyi +12 -12
  83. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  84. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  85. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  86. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  87. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  88. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  89. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  90. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  91. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  92. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  93. metaflow-stubs/plugins/argo/argo_workflows.pyi +4 -4
  94. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  95. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +4 -4
  96. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +3 -3
  97. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  98. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  99. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  100. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  101. metaflow-stubs/plugins/aws/batch/batch.pyi +2 -2
  102. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  103. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -2
  104. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  105. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  106. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  107. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  108. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  109. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +2 -2
  110. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  111. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  112. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +3 -3
  113. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  114. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  115. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  116. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  117. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  118. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  119. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  120. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  121. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  122. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  123. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  124. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  125. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  126. metaflow-stubs/plugins/cards/card_modules/basic.pyi +3 -3
  127. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  128. metaflow-stubs/plugins/cards/card_modules/components.pyi +4 -4
  129. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  130. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  131. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  132. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  133. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  134. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  135. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  136. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  137. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  138. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  139. metaflow-stubs/plugins/datatools/s3/s3.pyi +4 -4
  140. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  141. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  142. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  143. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  144. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  145. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  146. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  147. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  148. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  149. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  150. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  151. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  152. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  153. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  154. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  155. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +3 -3
  156. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +2 -2
  157. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  158. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +2 -2
  159. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  160. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  161. metaflow-stubs/plugins/ollama/__init__.pyi +3 -3
  162. metaflow-stubs/plugins/parallel_decorator.pyi +2 -2
  163. metaflow-stubs/plugins/perimeters.pyi +2 -2
  164. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  165. metaflow-stubs/plugins/pypi/__init__.pyi +3 -3
  166. metaflow-stubs/plugins/pypi/conda_decorator.pyi +2 -2
  167. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -4
  168. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  169. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +2 -2
  170. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  171. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  172. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  173. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  174. metaflow-stubs/plugins/secrets/__init__.pyi +2 -2
  175. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +3 -3
  176. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  177. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  178. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  179. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +3 -3
  180. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  181. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  182. metaflow-stubs/plugins/uv/uv_environment.pyi +3 -3
  183. metaflow-stubs/profilers/__init__.pyi +2 -2
  184. metaflow-stubs/pylint_wrapper.pyi +2 -2
  185. metaflow-stubs/runner/__init__.pyi +2 -2
  186. metaflow-stubs/runner/deployer.pyi +6 -6
  187. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  188. metaflow-stubs/runner/metaflow_runner.pyi +2 -2
  189. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  190. metaflow-stubs/runner/nbrun.pyi +2 -2
  191. metaflow-stubs/runner/subprocess_manager.pyi +2 -2
  192. metaflow-stubs/runner/utils.pyi +3 -3
  193. metaflow-stubs/system/__init__.pyi +2 -2
  194. metaflow-stubs/system/system_logger.pyi +2 -2
  195. metaflow-stubs/system/system_monitor.pyi +2 -2
  196. metaflow-stubs/tagging_util.pyi +2 -2
  197. metaflow-stubs/tuple_util.pyi +2 -2
  198. metaflow-stubs/user_configs/__init__.pyi +2 -2
  199. metaflow-stubs/user_configs/config_decorators.pyi +3 -3
  200. metaflow-stubs/user_configs/config_options.pyi +3 -3
  201. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  202. {ob_metaflow_stubs-6.0.3.162.dist-info → ob_metaflow_stubs-6.0.3.164.dist-info}/METADATA +1 -1
  203. ob_metaflow_stubs-6.0.3.164.dist-info/RECORD +206 -0
  204. ob_metaflow_stubs-6.0.3.162.dist-info/RECORD +0 -206
  205. {ob_metaflow_stubs-6.0.3.162.dist-info → ob_metaflow_stubs-6.0.3.164.dist-info}/WHEEL +0 -0
  206. {ob_metaflow_stubs-6.0.3.162.dist-info → ob_metaflow_stubs-6.0.3.164.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.15.10.1+obcheckpoint(0.2.1);ob(v1) #
4
- # Generated on 2025-05-05T21:26:32.787199 #
3
+ # MF version: 2.15.11.1+obcheckpoint(0.2.1);ob(v1) #
4
+ # Generated on 2025-05-07T07:24:19.774250 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -35,8 +35,8 @@ from .user_configs.config_parameters import ConfigValue as ConfigValue
35
35
  from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
- from . import cards as cards
39
38
  from . import tuple_util as tuple_util
39
+ from . import cards as cards
40
40
  from . import metaflow_git as metaflow_git
41
41
  from . import events as events
42
42
  from . import runner as runner
@@ -44,9 +44,9 @@ from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
45
  from . import includefile as includefile
46
46
  from .includefile import IncludeFile as IncludeFile
47
+ from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
47
48
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
48
49
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
- from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
50
50
  from . import client as client
51
51
  from .client.core import namespace as namespace
52
52
  from .client.core import get_namespace as get_namespace
@@ -153,444 +153,572 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
153
153
  """
154
154
  ...
155
155
 
156
- def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
156
+ @typing.overload
157
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
157
158
  """
158
- Specifies that this step is used to deploy an instance of the app.
159
- Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
159
+ Enables checkpointing for a step.
160
+
160
161
 
161
162
 
162
163
  Parameters
163
164
  ----------
164
- app_port : int
165
- Number of GPUs to use.
166
- app_name : str
167
- Name of the app to deploy.
165
+ load_policy : str, default: "fresh"
166
+ The policy for loading the checkpoint. The following policies are supported:
167
+ - "eager": Loads the the latest available checkpoint within the namespace.
168
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
169
+ will be loaded at the start of the task.
170
+ - "none": Do not load any checkpoint
171
+ - "fresh": Loads the lastest checkpoint created within the running Task.
172
+ This mode helps loading checkpoints across various retry attempts of the same task.
173
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
174
+ created within the task will be loaded when the task is retries execution on failure.
175
+
176
+ temp_dir_root : str, default: None
177
+ The root directory under which `current.checkpoint.directory` will be created.
168
178
  """
169
179
  ...
170
180
 
171
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
181
+ @typing.overload
182
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
183
+ ...
184
+
185
+ @typing.overload
186
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
187
+ ...
188
+
189
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
172
190
  """
173
- Decorator that helps cache, version and store models/datasets from huggingface hub.
191
+ Enables checkpointing for a step.
192
+
174
193
 
175
194
 
176
195
  Parameters
177
196
  ----------
178
- temp_dir_root : str, optional
179
- The root directory that will hold the temporary directory where objects will be downloaded.
197
+ load_policy : str, default: "fresh"
198
+ The policy for loading the checkpoint. The following policies are supported:
199
+ - "eager": Loads the the latest available checkpoint within the namespace.
200
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
201
+ will be loaded at the start of the task.
202
+ - "none": Do not load any checkpoint
203
+ - "fresh": Loads the lastest checkpoint created within the running Task.
204
+ This mode helps loading checkpoints across various retry attempts of the same task.
205
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
206
+ created within the task will be loaded when the task is retries execution on failure.
180
207
 
181
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
182
- The list of repos (models/datasets) to load.
208
+ temp_dir_root : str, default: None
209
+ The root directory under which `current.checkpoint.directory` will be created.
210
+ """
211
+ ...
212
+
213
+ @typing.overload
214
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
215
+ """
216
+ Specifies the PyPI packages for the step.
183
217
 
184
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
218
+ Information in this decorator will augment any
219
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
220
+ you can use `@pypi_base` to set packages required by all
221
+ steps and use `@pypi` to specify step-specific overrides.
185
222
 
186
- - If repo (model/dataset) is not found in the datastore:
187
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
188
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
189
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
190
223
 
191
- - If repo is found in the datastore:
192
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
224
+ Parameters
225
+ ----------
226
+ packages : Dict[str, str], default: {}
227
+ Packages to use for this step. The key is the name of the package
228
+ and the value is the version to use.
229
+ python : str, optional, default: None
230
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
231
+ that the version used will correspond to the version of the Python interpreter used to start the run.
193
232
  """
194
233
  ...
195
234
 
196
235
  @typing.overload
197
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
198
- """
199
- Internal decorator to support Fast bakery
200
- """
236
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
201
237
  ...
202
238
 
203
239
  @typing.overload
204
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
240
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
205
241
  ...
206
242
 
207
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
243
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
208
244
  """
209
- Internal decorator to support Fast bakery
245
+ Specifies the PyPI packages for the step.
246
+
247
+ Information in this decorator will augment any
248
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
249
+ you can use `@pypi_base` to set packages required by all
250
+ steps and use `@pypi` to specify step-specific overrides.
251
+
252
+
253
+ Parameters
254
+ ----------
255
+ packages : Dict[str, str], default: {}
256
+ Packages to use for this step. The key is the name of the package
257
+ and the value is the version to use.
258
+ python : str, optional, default: None
259
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
260
+ that the version used will correspond to the version of the Python interpreter used to start the run.
210
261
  """
211
262
  ...
212
263
 
213
264
  @typing.overload
214
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
265
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
215
266
  """
216
- Specifies secrets to be retrieved and injected as environment variables prior to
217
- the execution of a step.
267
+ Specifies the resources needed when executing this step.
268
+
269
+ Use `@resources` to specify the resource requirements
270
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
271
+
272
+ You can choose the compute layer on the command line by executing e.g.
273
+ ```
274
+ python myflow.py run --with batch
275
+ ```
276
+ or
277
+ ```
278
+ python myflow.py run --with kubernetes
279
+ ```
280
+ which executes the flow on the desired system using the
281
+ requirements specified in `@resources`.
218
282
 
219
283
 
220
284
  Parameters
221
285
  ----------
222
- sources : List[Union[str, Dict[str, Any]]], default: []
223
- List of secret specs, defining how the secrets are to be retrieved
286
+ cpu : int, default 1
287
+ Number of CPUs required for this step.
288
+ gpu : int, optional, default None
289
+ Number of GPUs required for this step.
290
+ disk : int, optional, default None
291
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
292
+ memory : int, default 4096
293
+ Memory size (in MB) required for this step.
294
+ shared_memory : int, optional, default None
295
+ The value for the size (in MiB) of the /dev/shm volume for this step.
296
+ This parameter maps to the `--shm-size` option in Docker.
224
297
  """
225
298
  ...
226
299
 
227
300
  @typing.overload
228
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
301
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
229
302
  ...
230
303
 
231
304
  @typing.overload
232
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
305
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
233
306
  ...
234
307
 
235
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
308
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
236
309
  """
237
- Specifies secrets to be retrieved and injected as environment variables prior to
238
- the execution of a step.
310
+ Specifies the resources needed when executing this step.
311
+
312
+ Use `@resources` to specify the resource requirements
313
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
314
+
315
+ You can choose the compute layer on the command line by executing e.g.
316
+ ```
317
+ python myflow.py run --with batch
318
+ ```
319
+ or
320
+ ```
321
+ python myflow.py run --with kubernetes
322
+ ```
323
+ which executes the flow on the desired system using the
324
+ requirements specified in `@resources`.
239
325
 
240
326
 
241
327
  Parameters
242
328
  ----------
243
- sources : List[Union[str, Dict[str, Any]]], default: []
244
- List of secret specs, defining how the secrets are to be retrieved
329
+ cpu : int, default 1
330
+ Number of CPUs required for this step.
331
+ gpu : int, optional, default None
332
+ Number of GPUs required for this step.
333
+ disk : int, optional, default None
334
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
335
+ memory : int, default 4096
336
+ Memory size (in MB) required for this step.
337
+ shared_memory : int, optional, default None
338
+ The value for the size (in MiB) of the /dev/shm volume for this step.
339
+ This parameter maps to the `--shm-size` option in Docker.
245
340
  """
246
341
  ...
247
342
 
248
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
343
+ @typing.overload
344
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
249
345
  """
250
- Specifies that this step should execute on DGX cloud.
346
+ Specifies a timeout for your step.
347
+
348
+ This decorator is useful if this step may hang indefinitely.
349
+
350
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
351
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
352
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
353
+
354
+ Note that all the values specified in parameters are added together so if you specify
355
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
251
356
 
252
357
 
253
358
  Parameters
254
359
  ----------
255
- gpu : int
256
- Number of GPUs to use.
257
- gpu_type : str
258
- Type of Nvidia GPU to use.
259
- queue_timeout : int
260
- Time to keep the job in NVCF's queue.
360
+ seconds : int, default 0
361
+ Number of seconds to wait prior to timing out.
362
+ minutes : int, default 0
363
+ Number of minutes to wait prior to timing out.
364
+ hours : int, default 0
365
+ Number of hours to wait prior to timing out.
261
366
  """
262
367
  ...
263
368
 
264
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
369
+ @typing.overload
370
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
371
+ ...
372
+
373
+ @typing.overload
374
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
375
+ ...
376
+
377
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
265
378
  """
266
- Specifies that this step should execute on Kubernetes.
379
+ Specifies a timeout for your step.
380
+
381
+ This decorator is useful if this step may hang indefinitely.
382
+
383
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
384
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
385
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
386
+
387
+ Note that all the values specified in parameters are added together so if you specify
388
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
267
389
 
268
390
 
269
391
  Parameters
270
392
  ----------
271
- cpu : int, default 1
272
- Number of CPUs required for this step. If `@resources` is
273
- also present, the maximum value from all decorators is used.
274
- memory : int, default 4096
275
- Memory size (in MB) required for this step. If
276
- `@resources` is also present, the maximum value from all decorators is
277
- used.
278
- disk : int, default 10240
279
- Disk size (in MB) required for this step. If
280
- `@resources` is also present, the maximum value from all decorators is
281
- used.
282
- image : str, optional, default None
283
- Docker image to use when launching on Kubernetes. If not specified, and
284
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
285
- not, a default Docker image mapping to the current version of Python is used.
286
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
287
- If given, the imagePullPolicy to be applied to the Docker image of the step.
288
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
289
- Kubernetes service account to use when launching pod in Kubernetes.
290
- secrets : List[str], optional, default None
291
- Kubernetes secrets to use when launching pod in Kubernetes. These
292
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
293
- in Metaflow configuration.
294
- node_selector: Union[Dict[str,str], str], optional, default None
295
- Kubernetes node selector(s) to apply to the pod running the task.
296
- Can be passed in as a comma separated string of values e.g.
297
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
298
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
299
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
300
- Kubernetes namespace to use when launching pod in Kubernetes.
301
- gpu : int, optional, default None
302
- Number of GPUs required for this step. A value of zero implies that
303
- the scheduled node should not have GPUs.
304
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
305
- The vendor of the GPUs to be used for this step.
306
- tolerations : List[str], default []
307
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
308
- Kubernetes tolerations to use when launching pod in Kubernetes.
309
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
310
- Kubernetes labels to use when launching pod in Kubernetes.
311
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
312
- Kubernetes annotations to use when launching pod in Kubernetes.
313
- use_tmpfs : bool, default False
314
- This enables an explicit tmpfs mount for this step.
315
- tmpfs_tempdir : bool, default True
316
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
317
- tmpfs_size : int, optional, default: None
318
- The value for the size (in MiB) of the tmpfs mount for this step.
319
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
320
- memory allocated for this step.
321
- tmpfs_path : str, optional, default /metaflow_temp
322
- Path to tmpfs mount for this step.
323
- persistent_volume_claims : Dict[str, str], optional, default None
324
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
325
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
326
- shared_memory: int, optional
327
- Shared memory size (in MiB) required for this step
328
- port: int, optional
329
- Port number to specify in the Kubernetes job object
330
- compute_pool : str, optional, default None
331
- Compute pool to be used for for this step.
332
- If not specified, any accessible compute pool within the perimeter is used.
333
- hostname_resolution_timeout: int, default 10 * 60
334
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
335
- Only applicable when @parallel is used.
336
- qos: str, default: Burstable
337
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
338
-
339
- security_context: Dict[str, Any], optional, default None
340
- Container security context. Applies to the task container. Allows the following keys:
341
- - privileged: bool, optional, default None
342
- - allow_privilege_escalation: bool, optional, default None
343
- - run_as_user: int, optional, default None
344
- - run_as_group: int, optional, default None
345
- - run_as_non_root: bool, optional, default None
393
+ seconds : int, default 0
394
+ Number of seconds to wait prior to timing out.
395
+ minutes : int, default 0
396
+ Number of minutes to wait prior to timing out.
397
+ hours : int, default 0
398
+ Number of hours to wait prior to timing out.
346
399
  """
347
400
  ...
348
401
 
349
- def ollama(*, models: "list[Ollama]", backend: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
402
+ @typing.overload
403
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
350
404
  """
351
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
352
-
353
- User code call
354
- -----------
355
- @ollama(
356
- models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
357
- backend='local'
358
- )
359
-
360
- Valid backend options
361
- ---------------------
362
- - 'local': Run as a separate process on the local task machine.
363
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
364
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
405
+ Enables loading / saving of models within a step.
365
406
 
366
- Valid model options
367
- ----------------
368
- - 'llama3.2'
369
- - 'llama3.3'
370
- - any model here https://ollama.com/search
371
407
 
372
408
 
373
409
  Parameters
374
410
  ----------
375
- models: list[Ollama]
376
- List of Ollama containers running models in sidecars.
377
- backend: str
378
- Determines where and how to run the Ollama process.
411
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
412
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
413
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
414
+ - `current.checkpoint`
415
+ - `current.model`
416
+ - `current.huggingface_hub`
417
+
418
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
419
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
420
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
421
+
422
+ temp_dir_root : str, default: None
423
+ The root directory under which `current.model.loaded` will store loaded models
379
424
  """
380
425
  ...
381
426
 
382
427
  @typing.overload
383
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
384
- """
385
- Decorator prototype for all step decorators. This function gets specialized
386
- and imported for all decorators types by _import_plugin_decorators().
387
- """
428
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
388
429
  ...
389
430
 
390
431
  @typing.overload
391
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
432
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
392
433
  ...
393
434
 
394
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
435
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
395
436
  """
396
- Decorator prototype for all step decorators. This function gets specialized
397
- and imported for all decorators types by _import_plugin_decorators().
437
+ Enables loading / saving of models within a step.
438
+
439
+
440
+
441
+ Parameters
442
+ ----------
443
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
444
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
445
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
446
+ - `current.checkpoint`
447
+ - `current.model`
448
+ - `current.huggingface_hub`
449
+
450
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
451
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
452
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
453
+
454
+ temp_dir_root : str, default: None
455
+ The root directory under which `current.model.loaded` will store loaded models
398
456
  """
399
457
  ...
400
458
 
401
- def nim(*, models: "list[NIM]", backend: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
459
+ def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
402
460
  """
403
- This decorator is used to run NIM containers in Metaflow tasks as sidecars.
404
-
405
- User code call
406
- -----------
407
- @nim(
408
- models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
409
- backend='managed'
410
- )
411
-
412
- Valid backend options
413
- ---------------------
414
- - 'managed': Outerbounds selects a compute provider based on the model.
415
-
416
- Valid model options
417
- ----------------
418
- - 'meta/llama3-8b-instruct': 8B parameter model
419
- - 'meta/llama3-70b-instruct': 70B parameter model
420
- - any model here: https://nvcf.ngc.nvidia.com/functions?filter=nvidia-functions
461
+ Specifies that this step is used to deploy an instance of the app.
462
+ Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
421
463
 
422
464
 
423
465
  Parameters
424
466
  ----------
425
- models: list[NIM]
426
- List of NIM containers running models in sidecars.
427
- backend: str
428
- Compute provider to run the NIM container.
429
- queue_timeout : int
430
- Time to keep the job in NVCF's queue.
467
+ app_port : int
468
+ Number of GPUs to use.
469
+ app_name : str
470
+ Name of the app to deploy.
431
471
  """
432
472
  ...
433
473
 
434
474
  @typing.overload
435
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
475
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
436
476
  """
437
- Enables checkpointing for a step.
438
-
477
+ Specifies environment variables to be set prior to the execution of a step.
439
478
 
440
479
 
441
480
  Parameters
442
481
  ----------
443
- load_policy : str, default: "fresh"
444
- The policy for loading the checkpoint. The following policies are supported:
445
- - "eager": Loads the the latest available checkpoint within the namespace.
446
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
447
- will be loaded at the start of the task.
448
- - "none": Do not load any checkpoint
449
- - "fresh": Loads the lastest checkpoint created within the running Task.
450
- This mode helps loading checkpoints across various retry attempts of the same task.
451
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
452
- created within the task will be loaded when the task is retries execution on failure.
453
-
454
- temp_dir_root : str, default: None
455
- The root directory under which `current.checkpoint.directory` will be created.
482
+ vars : Dict[str, str], default {}
483
+ Dictionary of environment variables to set.
456
484
  """
457
485
  ...
458
486
 
459
487
  @typing.overload
460
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
488
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
461
489
  ...
462
490
 
463
491
  @typing.overload
464
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
492
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
465
493
  ...
466
494
 
467
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
495
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
468
496
  """
469
- Enables checkpointing for a step.
470
-
497
+ Specifies environment variables to be set prior to the execution of a step.
471
498
 
472
499
 
473
500
  Parameters
474
501
  ----------
475
- load_policy : str, default: "fresh"
476
- The policy for loading the checkpoint. The following policies are supported:
477
- - "eager": Loads the the latest available checkpoint within the namespace.
478
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
479
- will be loaded at the start of the task.
480
- - "none": Do not load any checkpoint
481
- - "fresh": Loads the lastest checkpoint created within the running Task.
482
- This mode helps loading checkpoints across various retry attempts of the same task.
483
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
484
- created within the task will be loaded when the task is retries execution on failure.
485
-
486
- temp_dir_root : str, default: None
487
- The root directory under which `current.checkpoint.directory` will be created.
502
+ vars : Dict[str, str], default {}
503
+ Dictionary of environment variables to set.
488
504
  """
489
505
  ...
490
506
 
491
507
  @typing.overload
492
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
508
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
493
509
  """
494
- Specifies that the step will success under all circumstances.
495
-
496
- The decorator will create an optional artifact, specified by `var`, which
497
- contains the exception raised. You can use it to detect the presence
498
- of errors, indicating that all happy-path artifacts produced by the step
499
- are missing.
500
-
501
-
502
- Parameters
503
- ----------
504
- var : str, optional, default None
505
- Name of the artifact in which to store the caught exception.
506
- If not specified, the exception is not stored.
507
- print_exception : bool, default True
508
- Determines whether or not the exception is printed to
509
- stdout when caught.
510
+ Decorator prototype for all step decorators. This function gets specialized
511
+ and imported for all decorators types by _import_plugin_decorators().
510
512
  """
511
513
  ...
512
514
 
513
515
  @typing.overload
514
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
516
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
515
517
  ...
516
518
 
517
- @typing.overload
518
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
519
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
520
+ """
521
+ Decorator prototype for all step decorators. This function gets specialized
522
+ and imported for all decorators types by _import_plugin_decorators().
523
+ """
519
524
  ...
520
525
 
521
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
526
+ @typing.overload
527
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
522
528
  """
523
- Specifies that the step will success under all circumstances.
529
+ Specifies the Conda environment for the step.
524
530
 
525
- The decorator will create an optional artifact, specified by `var`, which
526
- contains the exception raised. You can use it to detect the presence
527
- of errors, indicating that all happy-path artifacts produced by the step
528
- are missing.
531
+ Information in this decorator will augment any
532
+ attributes set in the `@conda_base` flow-level decorator. Hence,
533
+ you can use `@conda_base` to set packages required by all
534
+ steps and use `@conda` to specify step-specific overrides.
529
535
 
530
536
 
531
537
  Parameters
532
538
  ----------
533
- var : str, optional, default None
534
- Name of the artifact in which to store the caught exception.
535
- If not specified, the exception is not stored.
536
- print_exception : bool, default True
537
- Determines whether or not the exception is printed to
538
- stdout when caught.
539
+ packages : Dict[str, str], default {}
540
+ Packages to use for this step. The key is the name of the package
541
+ and the value is the version to use.
542
+ libraries : Dict[str, str], default {}
543
+ Supported for backward compatibility. When used with packages, packages will take precedence.
544
+ python : str, optional, default None
545
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
546
+ that the version used will correspond to the version of the Python interpreter used to start the run.
547
+ disabled : bool, default False
548
+ If set to True, disables @conda.
539
549
  """
540
550
  ...
541
551
 
542
552
  @typing.overload
543
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
544
- """
545
- Specifies the number of times the task corresponding
546
- to a step needs to be retried.
553
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
554
+ ...
555
+
556
+ @typing.overload
557
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
558
+ ...
559
+
560
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
561
+ """
562
+ Specifies the Conda environment for the step.
547
563
 
548
- This decorator is useful for handling transient errors, such as networking issues.
549
- If your task contains operations that can't be retried safely, e.g. database updates,
550
- it is advisable to annotate it with `@retry(times=0)`.
564
+ Information in this decorator will augment any
565
+ attributes set in the `@conda_base` flow-level decorator. Hence,
566
+ you can use `@conda_base` to set packages required by all
567
+ steps and use `@conda` to specify step-specific overrides.
551
568
 
552
- This can be used in conjunction with the `@catch` decorator. The `@catch`
553
- decorator will execute a no-op task after all retries have been exhausted,
554
- ensuring that the flow execution can continue.
569
+
570
+ Parameters
571
+ ----------
572
+ packages : Dict[str, str], default {}
573
+ Packages to use for this step. The key is the name of the package
574
+ and the value is the version to use.
575
+ libraries : Dict[str, str], default {}
576
+ Supported for backward compatibility. When used with packages, packages will take precedence.
577
+ python : str, optional, default None
578
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
579
+ that the version used will correspond to the version of the Python interpreter used to start the run.
580
+ disabled : bool, default False
581
+ If set to True, disables @conda.
582
+ """
583
+ ...
584
+
585
+ @typing.overload
586
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
587
+ """
588
+ Specifies secrets to be retrieved and injected as environment variables prior to
589
+ the execution of a step.
555
590
 
556
591
 
557
592
  Parameters
558
593
  ----------
559
- times : int, default 3
560
- Number of times to retry this task.
561
- minutes_between_retries : int, default 2
562
- Number of minutes between retries.
594
+ sources : List[Union[str, Dict[str, Any]]], default: []
595
+ List of secret specs, defining how the secrets are to be retrieved
563
596
  """
564
597
  ...
565
598
 
566
599
  @typing.overload
567
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
600
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
568
601
  ...
569
602
 
570
603
  @typing.overload
571
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
604
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
572
605
  ...
573
606
 
574
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
607
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
575
608
  """
576
- Specifies the number of times the task corresponding
577
- to a step needs to be retried.
609
+ Specifies secrets to be retrieved and injected as environment variables prior to
610
+ the execution of a step.
578
611
 
579
- This decorator is useful for handling transient errors, such as networking issues.
580
- If your task contains operations that can't be retried safely, e.g. database updates,
581
- it is advisable to annotate it with `@retry(times=0)`.
582
612
 
583
- This can be used in conjunction with the `@catch` decorator. The `@catch`
584
- decorator will execute a no-op task after all retries have been exhausted,
585
- ensuring that the flow execution can continue.
613
+ Parameters
614
+ ----------
615
+ sources : List[Union[str, Dict[str, Any]]], default: []
616
+ List of secret specs, defining how the secrets are to be retrieved
617
+ """
618
+ ...
619
+
620
+ def ollama(*, models: "list[Ollama]", backend: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
621
+ """
622
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
623
+
624
+ User code call
625
+ -----------
626
+ @ollama(
627
+ models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
628
+ backend='local'
629
+ )
630
+
631
+ Valid backend options
632
+ ---------------------
633
+ - 'local': Run as a separate process on the local task machine.
634
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
635
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
636
+
637
+ Valid model options
638
+ ----------------
639
+ - 'llama3.2'
640
+ - 'llama3.3'
641
+ - any model here https://ollama.com/search
586
642
 
587
643
 
588
644
  Parameters
589
645
  ----------
590
- times : int, default 3
591
- Number of times to retry this task.
592
- minutes_between_retries : int, default 2
593
- Number of minutes between retries.
646
+ models: list[Ollama]
647
+ List of Ollama containers running models in sidecars.
648
+ backend: str
649
+ Determines where and how to run the Ollama process.
650
+ """
651
+ ...
652
+
653
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
654
+ """
655
+ Specifies that this step should execute on DGX cloud.
656
+
657
+
658
+ Parameters
659
+ ----------
660
+ gpu : int
661
+ Number of GPUs to use.
662
+ gpu_type : str
663
+ Type of Nvidia GPU to use.
664
+ """
665
+ ...
666
+
667
+ @typing.overload
668
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
669
+ """
670
+ Internal decorator to support Fast bakery
671
+ """
672
+ ...
673
+
674
+ @typing.overload
675
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
676
+ ...
677
+
678
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
679
+ """
680
+ Internal decorator to support Fast bakery
681
+ """
682
+ ...
683
+
684
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
685
+ """
686
+ Specifies that this step should execute on DGX cloud.
687
+
688
+
689
+ Parameters
690
+ ----------
691
+ gpu : int
692
+ Number of GPUs to use.
693
+ gpu_type : str
694
+ Type of Nvidia GPU to use.
695
+ queue_timeout : int
696
+ Time to keep the job in NVCF's queue.
697
+ """
698
+ ...
699
+
700
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
701
+ """
702
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
703
+
704
+
705
+ Parameters
706
+ ----------
707
+ temp_dir_root : str, optional
708
+ The root directory that will hold the temporary directory where objects will be downloaded.
709
+
710
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
711
+ The list of repos (models/datasets) to load.
712
+
713
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
714
+
715
+ - If repo (model/dataset) is not found in the datastore:
716
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
717
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
718
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
719
+
720
+ - If repo is found in the datastore:
721
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
594
722
  """
595
723
  ...
596
724
 
@@ -643,341 +771,486 @@ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
643
771
  """
644
772
  ...
645
773
 
774
+ def nim(*, models: "list[NIM]", backend: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
775
+ """
776
+ This decorator is used to run NIM containers in Metaflow tasks as sidecars.
777
+
778
+ User code call
779
+ -----------
780
+ @nim(
781
+ models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
782
+ backend='managed'
783
+ )
784
+
785
+ Valid backend options
786
+ ---------------------
787
+ - 'managed': Outerbounds selects a compute provider based on the model.
788
+
789
+ Valid model options
790
+ ----------------
791
+ - 'meta/llama3-8b-instruct': 8B parameter model
792
+ - 'meta/llama3-70b-instruct': 70B parameter model
793
+ - any model here: https://nvcf.ngc.nvidia.com/functions?filter=nvidia-functions
794
+
795
+
796
+ Parameters
797
+ ----------
798
+ models: list[NIM]
799
+ List of NIM containers running models in sidecars.
800
+ backend: str
801
+ Compute provider to run the NIM container.
802
+ queue_timeout : int
803
+ Time to keep the job in NVCF's queue.
804
+ """
805
+ ...
806
+
646
807
  @typing.overload
647
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
808
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
648
809
  """
649
- Specifies the Conda environment for the step.
810
+ Specifies that the step will success under all circumstances.
650
811
 
651
- Information in this decorator will augment any
652
- attributes set in the `@conda_base` flow-level decorator. Hence,
653
- you can use `@conda_base` to set packages required by all
654
- steps and use `@conda` to specify step-specific overrides.
812
+ The decorator will create an optional artifact, specified by `var`, which
813
+ contains the exception raised. You can use it to detect the presence
814
+ of errors, indicating that all happy-path artifacts produced by the step
815
+ are missing.
655
816
 
656
817
 
657
818
  Parameters
658
819
  ----------
659
- packages : Dict[str, str], default {}
660
- Packages to use for this step. The key is the name of the package
661
- and the value is the version to use.
662
- libraries : Dict[str, str], default {}
663
- Supported for backward compatibility. When used with packages, packages will take precedence.
664
- python : str, optional, default None
665
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
666
- that the version used will correspond to the version of the Python interpreter used to start the run.
667
- disabled : bool, default False
668
- If set to True, disables @conda.
820
+ var : str, optional, default None
821
+ Name of the artifact in which to store the caught exception.
822
+ If not specified, the exception is not stored.
823
+ print_exception : bool, default True
824
+ Determines whether or not the exception is printed to
825
+ stdout when caught.
669
826
  """
670
827
  ...
671
828
 
672
829
  @typing.overload
673
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
830
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
674
831
  ...
675
832
 
676
833
  @typing.overload
677
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
834
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
678
835
  ...
679
836
 
680
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
837
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
681
838
  """
682
- Specifies the Conda environment for the step.
839
+ Specifies that the step will success under all circumstances.
683
840
 
684
- Information in this decorator will augment any
685
- attributes set in the `@conda_base` flow-level decorator. Hence,
686
- you can use `@conda_base` to set packages required by all
687
- steps and use `@conda` to specify step-specific overrides.
841
+ The decorator will create an optional artifact, specified by `var`, which
842
+ contains the exception raised. You can use it to detect the presence
843
+ of errors, indicating that all happy-path artifacts produced by the step
844
+ are missing.
688
845
 
689
846
 
690
847
  Parameters
691
848
  ----------
692
- packages : Dict[str, str], default {}
693
- Packages to use for this step. The key is the name of the package
694
- and the value is the version to use.
695
- libraries : Dict[str, str], default {}
696
- Supported for backward compatibility. When used with packages, packages will take precedence.
697
- python : str, optional, default None
698
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
699
- that the version used will correspond to the version of the Python interpreter used to start the run.
700
- disabled : bool, default False
701
- If set to True, disables @conda.
849
+ var : str, optional, default None
850
+ Name of the artifact in which to store the caught exception.
851
+ If not specified, the exception is not stored.
852
+ print_exception : bool, default True
853
+ Determines whether or not the exception is printed to
854
+ stdout when caught.
702
855
  """
703
856
  ...
704
857
 
705
858
  @typing.overload
706
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
859
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
707
860
  """
708
- Specifies a timeout for your step.
709
-
710
- This decorator is useful if this step may hang indefinitely.
861
+ Specifies the number of times the task corresponding
862
+ to a step needs to be retried.
711
863
 
712
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
713
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
714
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
864
+ This decorator is useful for handling transient errors, such as networking issues.
865
+ If your task contains operations that can't be retried safely, e.g. database updates,
866
+ it is advisable to annotate it with `@retry(times=0)`.
715
867
 
716
- Note that all the values specified in parameters are added together so if you specify
717
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
868
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
869
+ decorator will execute a no-op task after all retries have been exhausted,
870
+ ensuring that the flow execution can continue.
718
871
 
719
872
 
720
873
  Parameters
721
874
  ----------
722
- seconds : int, default 0
723
- Number of seconds to wait prior to timing out.
724
- minutes : int, default 0
725
- Number of minutes to wait prior to timing out.
726
- hours : int, default 0
727
- Number of hours to wait prior to timing out.
875
+ times : int, default 3
876
+ Number of times to retry this task.
877
+ minutes_between_retries : int, default 2
878
+ Number of minutes between retries.
728
879
  """
729
880
  ...
730
881
 
731
882
  @typing.overload
732
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
883
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
733
884
  ...
734
885
 
735
886
  @typing.overload
736
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
887
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
737
888
  ...
738
889
 
739
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
890
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
740
891
  """
741
- Specifies a timeout for your step.
742
-
743
- This decorator is useful if this step may hang indefinitely.
892
+ Specifies the number of times the task corresponding
893
+ to a step needs to be retried.
744
894
 
745
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
746
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
747
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
895
+ This decorator is useful for handling transient errors, such as networking issues.
896
+ If your task contains operations that can't be retried safely, e.g. database updates,
897
+ it is advisable to annotate it with `@retry(times=0)`.
748
898
 
749
- Note that all the values specified in parameters are added together so if you specify
750
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
899
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
900
+ decorator will execute a no-op task after all retries have been exhausted,
901
+ ensuring that the flow execution can continue.
751
902
 
752
903
 
753
904
  Parameters
754
905
  ----------
755
- seconds : int, default 0
756
- Number of seconds to wait prior to timing out.
757
- minutes : int, default 0
758
- Number of minutes to wait prior to timing out.
759
- hours : int, default 0
760
- Number of hours to wait prior to timing out.
906
+ times : int, default 3
907
+ Number of times to retry this task.
908
+ minutes_between_retries : int, default 2
909
+ Number of minutes between retries.
761
910
  """
762
911
  ...
763
912
 
764
- @typing.overload
765
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
913
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
766
914
  """
767
- Specifies the resources needed when executing this step.
768
-
769
- Use `@resources` to specify the resource requirements
770
- independently of the specific compute layer (`@batch`, `@kubernetes`).
771
-
772
- You can choose the compute layer on the command line by executing e.g.
773
- ```
774
- python myflow.py run --with batch
775
- ```
776
- or
777
- ```
778
- python myflow.py run --with kubernetes
779
- ```
780
- which executes the flow on the desired system using the
781
- requirements specified in `@resources`.
915
+ Specifies that this step should execute on Kubernetes.
782
916
 
783
917
 
784
918
  Parameters
785
919
  ----------
786
920
  cpu : int, default 1
787
- Number of CPUs required for this step.
788
- gpu : int, optional, default None
789
- Number of GPUs required for this step.
790
- disk : int, optional, default None
791
- Disk size (in MB) required for this step. Only applies on Kubernetes.
921
+ Number of CPUs required for this step. If `@resources` is
922
+ also present, the maximum value from all decorators is used.
792
923
  memory : int, default 4096
793
- Memory size (in MB) required for this step.
794
- shared_memory : int, optional, default None
795
- The value for the size (in MiB) of the /dev/shm volume for this step.
796
- This parameter maps to the `--shm-size` option in Docker.
797
- """
798
- ...
799
-
800
- @typing.overload
801
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
802
- ...
803
-
804
- @typing.overload
805
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
806
- ...
807
-
808
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
809
- """
810
- Specifies the resources needed when executing this step.
811
-
812
- Use `@resources` to specify the resource requirements
813
- independently of the specific compute layer (`@batch`, `@kubernetes`).
814
-
815
- You can choose the compute layer on the command line by executing e.g.
816
- ```
817
- python myflow.py run --with batch
818
- ```
819
- or
820
- ```
821
- python myflow.py run --with kubernetes
822
- ```
823
- which executes the flow on the desired system using the
824
- requirements specified in `@resources`.
825
-
826
-
827
- Parameters
828
- ----------
829
- cpu : int, default 1
830
- Number of CPUs required for this step.
924
+ Memory size (in MB) required for this step. If
925
+ `@resources` is also present, the maximum value from all decorators is
926
+ used.
927
+ disk : int, default 10240
928
+ Disk size (in MB) required for this step. If
929
+ `@resources` is also present, the maximum value from all decorators is
930
+ used.
931
+ image : str, optional, default None
932
+ Docker image to use when launching on Kubernetes. If not specified, and
933
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
934
+ not, a default Docker image mapping to the current version of Python is used.
935
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
936
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
937
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
938
+ Kubernetes service account to use when launching pod in Kubernetes.
939
+ secrets : List[str], optional, default None
940
+ Kubernetes secrets to use when launching pod in Kubernetes. These
941
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
942
+ in Metaflow configuration.
943
+ node_selector: Union[Dict[str,str], str], optional, default None
944
+ Kubernetes node selector(s) to apply to the pod running the task.
945
+ Can be passed in as a comma separated string of values e.g.
946
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
947
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
948
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
949
+ Kubernetes namespace to use when launching pod in Kubernetes.
831
950
  gpu : int, optional, default None
832
- Number of GPUs required for this step.
833
- disk : int, optional, default None
834
- Disk size (in MB) required for this step. Only applies on Kubernetes.
835
- memory : int, default 4096
836
- Memory size (in MB) required for this step.
837
- shared_memory : int, optional, default None
838
- The value for the size (in MiB) of the /dev/shm volume for this step.
839
- This parameter maps to the `--shm-size` option in Docker.
951
+ Number of GPUs required for this step. A value of zero implies that
952
+ the scheduled node should not have GPUs.
953
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
954
+ The vendor of the GPUs to be used for this step.
955
+ tolerations : List[str], default []
956
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
957
+ Kubernetes tolerations to use when launching pod in Kubernetes.
958
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
959
+ Kubernetes labels to use when launching pod in Kubernetes.
960
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
961
+ Kubernetes annotations to use when launching pod in Kubernetes.
962
+ use_tmpfs : bool, default False
963
+ This enables an explicit tmpfs mount for this step.
964
+ tmpfs_tempdir : bool, default True
965
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
966
+ tmpfs_size : int, optional, default: None
967
+ The value for the size (in MiB) of the tmpfs mount for this step.
968
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
969
+ memory allocated for this step.
970
+ tmpfs_path : str, optional, default /metaflow_temp
971
+ Path to tmpfs mount for this step.
972
+ persistent_volume_claims : Dict[str, str], optional, default None
973
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
974
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
975
+ shared_memory: int, optional
976
+ Shared memory size (in MiB) required for this step
977
+ port: int, optional
978
+ Port number to specify in the Kubernetes job object
979
+ compute_pool : str, optional, default None
980
+ Compute pool to be used for for this step.
981
+ If not specified, any accessible compute pool within the perimeter is used.
982
+ hostname_resolution_timeout: int, default 10 * 60
983
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
984
+ Only applicable when @parallel is used.
985
+ qos: str, default: Burstable
986
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
987
+
988
+ security_context: Dict[str, Any], optional, default None
989
+ Container security context. Applies to the task container. Allows the following keys:
990
+ - privileged: bool, optional, default None
991
+ - allow_privilege_escalation: bool, optional, default None
992
+ - run_as_user: int, optional, default None
993
+ - run_as_group: int, optional, default None
994
+ - run_as_non_root: bool, optional, default None
840
995
  """
841
996
  ...
842
997
 
843
998
  @typing.overload
844
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
999
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
845
1000
  """
846
- Specifies environment variables to be set prior to the execution of a step.
1001
+ Specifies the times when the flow should be run when running on a
1002
+ production scheduler.
847
1003
 
848
1004
 
849
1005
  Parameters
850
1006
  ----------
851
- vars : Dict[str, str], default {}
852
- Dictionary of environment variables to set.
1007
+ hourly : bool, default False
1008
+ Run the workflow hourly.
1009
+ daily : bool, default True
1010
+ Run the workflow daily.
1011
+ weekly : bool, default False
1012
+ Run the workflow weekly.
1013
+ cron : str, optional, default None
1014
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1015
+ specified by this expression.
1016
+ timezone : str, optional, default None
1017
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1018
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
853
1019
  """
854
1020
  ...
855
1021
 
856
1022
  @typing.overload
857
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
858
- ...
859
-
860
- @typing.overload
861
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1023
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
862
1024
  ...
863
1025
 
864
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1026
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
865
1027
  """
866
- Specifies environment variables to be set prior to the execution of a step.
1028
+ Specifies the times when the flow should be run when running on a
1029
+ production scheduler.
867
1030
 
868
1031
 
869
1032
  Parameters
870
1033
  ----------
871
- vars : Dict[str, str], default {}
872
- Dictionary of environment variables to set.
1034
+ hourly : bool, default False
1035
+ Run the workflow hourly.
1036
+ daily : bool, default True
1037
+ Run the workflow daily.
1038
+ weekly : bool, default False
1039
+ Run the workflow weekly.
1040
+ cron : str, optional, default None
1041
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1042
+ specified by this expression.
1043
+ timezone : str, optional, default None
1044
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1045
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
873
1046
  """
874
1047
  ...
875
1048
 
876
1049
  @typing.overload
877
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1050
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
878
1051
  """
879
- Specifies the PyPI packages for the step.
1052
+ Specifies the Conda environment for all steps of the flow.
880
1053
 
881
- Information in this decorator will augment any
882
- attributes set in the `@pyi_base` flow-level decorator. Hence,
883
- you can use `@pypi_base` to set packages required by all
884
- steps and use `@pypi` to specify step-specific overrides.
1054
+ Use `@conda_base` to set common libraries required by all
1055
+ steps and use `@conda` to specify step-specific additions.
885
1056
 
886
1057
 
887
1058
  Parameters
888
1059
  ----------
889
- packages : Dict[str, str], default: {}
890
- Packages to use for this step. The key is the name of the package
1060
+ packages : Dict[str, str], default {}
1061
+ Packages to use for this flow. The key is the name of the package
891
1062
  and the value is the version to use.
892
- python : str, optional, default: None
1063
+ libraries : Dict[str, str], default {}
1064
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1065
+ python : str, optional, default None
893
1066
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
894
1067
  that the version used will correspond to the version of the Python interpreter used to start the run.
1068
+ disabled : bool, default False
1069
+ If set to True, disables Conda.
895
1070
  """
896
1071
  ...
897
1072
 
898
1073
  @typing.overload
899
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
900
- ...
901
-
902
- @typing.overload
903
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1074
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
904
1075
  ...
905
1076
 
906
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1077
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
907
1078
  """
908
- Specifies the PyPI packages for the step.
1079
+ Specifies the Conda environment for all steps of the flow.
909
1080
 
910
- Information in this decorator will augment any
911
- attributes set in the `@pyi_base` flow-level decorator. Hence,
912
- you can use `@pypi_base` to set packages required by all
913
- steps and use `@pypi` to specify step-specific overrides.
1081
+ Use `@conda_base` to set common libraries required by all
1082
+ steps and use `@conda` to specify step-specific additions.
914
1083
 
915
1084
 
916
1085
  Parameters
917
1086
  ----------
918
- packages : Dict[str, str], default: {}
919
- Packages to use for this step. The key is the name of the package
1087
+ packages : Dict[str, str], default {}
1088
+ Packages to use for this flow. The key is the name of the package
920
1089
  and the value is the version to use.
921
- python : str, optional, default: None
1090
+ libraries : Dict[str, str], default {}
1091
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1092
+ python : str, optional, default None
922
1093
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
923
1094
  that the version used will correspond to the version of the Python interpreter used to start the run.
1095
+ disabled : bool, default False
1096
+ If set to True, disables Conda.
924
1097
  """
925
1098
  ...
926
1099
 
927
- @typing.overload
928
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1100
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
929
1101
  """
930
- Enables loading / saving of models within a step.
931
-
1102
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1103
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
932
1104
 
933
1105
 
934
1106
  Parameters
935
1107
  ----------
936
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
937
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
938
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
939
- - `current.checkpoint`
940
- - `current.model`
941
- - `current.huggingface_hub`
942
-
943
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
944
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
945
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
946
-
947
- temp_dir_root : str, default: None
948
- The root directory under which `current.model.loaded` will store loaded models
1108
+ timeout : int
1109
+ Time, in seconds before the task times out and fails. (Default: 3600)
1110
+ poke_interval : int
1111
+ Time in seconds that the job should wait in between each try. (Default: 60)
1112
+ mode : str
1113
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1114
+ exponential_backoff : bool
1115
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1116
+ pool : str
1117
+ the slot pool this task should run in,
1118
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1119
+ soft_fail : bool
1120
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1121
+ name : str
1122
+ Name of the sensor on Airflow
1123
+ description : str
1124
+ Description of sensor in the Airflow UI
1125
+ external_dag_id : str
1126
+ The dag_id that contains the task you want to wait for.
1127
+ external_task_ids : List[str]
1128
+ The list of task_ids that you want to wait for.
1129
+ If None (default value) the sensor waits for the DAG. (Default: None)
1130
+ allowed_states : List[str]
1131
+ Iterable of allowed states, (Default: ['success'])
1132
+ failed_states : List[str]
1133
+ Iterable of failed or dis-allowed states. (Default: None)
1134
+ execution_delta : datetime.timedelta
1135
+ time difference with the previous execution to look at,
1136
+ the default is the same logical date as the current task or DAG. (Default: None)
1137
+ check_existence: bool
1138
+ Set to True to check if the external task exists or check if
1139
+ the DAG to wait for exists. (Default: True)
949
1140
  """
950
1141
  ...
951
1142
 
952
- @typing.overload
953
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
954
- ...
955
-
956
- @typing.overload
957
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
958
- ...
959
-
960
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
1143
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
961
1144
  """
962
- Enables loading / saving of models within a step.
1145
+ Allows setting external datastores to save data for the
1146
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
963
1147
 
1148
+ This decorator is useful when users wish to save data to a different datastore
1149
+ than what is configured in Metaflow. This can be for variety of reasons:
964
1150
 
1151
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1152
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1153
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1154
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1155
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
965
1156
 
966
- Parameters
1157
+ Usage:
967
1158
  ----------
968
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
969
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
970
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
971
- - `current.checkpoint`
972
- - `current.model`
973
- - `current.huggingface_hub`
974
1159
 
975
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
976
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
977
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
1160
+ - Using a custom IAM role to access the datastore.
1161
+
1162
+ ```python
1163
+ @with_artifact_store(
1164
+ type="s3",
1165
+ config=lambda: {
1166
+ "root": "s3://my-bucket-foo/path/to/root",
1167
+ "role_arn": ROLE,
1168
+ },
1169
+ )
1170
+ class MyFlow(FlowSpec):
1171
+
1172
+ @checkpoint
1173
+ @step
1174
+ def start(self):
1175
+ with open("my_file.txt", "w") as f:
1176
+ f.write("Hello, World!")
1177
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1178
+ self.next(self.end)
1179
+
1180
+ ```
1181
+
1182
+ - Using credentials to access the s3-compatible datastore.
1183
+
1184
+ ```python
1185
+ @with_artifact_store(
1186
+ type="s3",
1187
+ config=lambda: {
1188
+ "root": "s3://my-bucket-foo/path/to/root",
1189
+ "client_params": {
1190
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1191
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1192
+ },
1193
+ },
1194
+ )
1195
+ class MyFlow(FlowSpec):
1196
+
1197
+ @checkpoint
1198
+ @step
1199
+ def start(self):
1200
+ with open("my_file.txt", "w") as f:
1201
+ f.write("Hello, World!")
1202
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1203
+ self.next(self.end)
1204
+
1205
+ ```
1206
+
1207
+ - Accessing objects stored in external datastores after task execution.
1208
+
1209
+ ```python
1210
+ run = Run("CheckpointsTestsFlow/8992")
1211
+ with artifact_store_from(run=run, config={
1212
+ "client_params": {
1213
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1214
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1215
+ },
1216
+ }):
1217
+ with Checkpoint() as cp:
1218
+ latest = cp.list(
1219
+ task=run["start"].task
1220
+ )[0]
1221
+ print(latest)
1222
+ cp.load(
1223
+ latest,
1224
+ "test-checkpoints"
1225
+ )
1226
+
1227
+ task = Task("TorchTuneFlow/8484/train/53673")
1228
+ with artifact_store_from(run=run, config={
1229
+ "client_params": {
1230
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1231
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1232
+ },
1233
+ }):
1234
+ load_model(
1235
+ task.data.model_ref,
1236
+ "test-models"
1237
+ )
1238
+ ```
1239
+ Parameters:
1240
+ ----------
978
1241
 
979
- temp_dir_root : str, default: None
980
- The root directory under which `current.model.loaded` will store loaded models
1242
+ type: str
1243
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1244
+
1245
+ config: dict or Callable
1246
+ Dictionary of configuration options for the datastore. The following keys are required:
1247
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1248
+ - example: 's3://bucket-name/path/to/root'
1249
+ - example: 'gs://bucket-name/path/to/root'
1250
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1251
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1252
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1253
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
981
1254
  """
982
1255
  ...
983
1256
 
@@ -1016,49 +1289,6 @@ def project(*, name: str, branch: typing.Optional[str] = None, production: bool
1016
1289
  """
1017
1290
  ...
1018
1291
 
1019
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1020
- """
1021
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1022
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1023
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1024
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1025
- starts only after all sensors finish.
1026
-
1027
-
1028
- Parameters
1029
- ----------
1030
- timeout : int
1031
- Time, in seconds before the task times out and fails. (Default: 3600)
1032
- poke_interval : int
1033
- Time in seconds that the job should wait in between each try. (Default: 60)
1034
- mode : str
1035
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1036
- exponential_backoff : bool
1037
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1038
- pool : str
1039
- the slot pool this task should run in,
1040
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1041
- soft_fail : bool
1042
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1043
- name : str
1044
- Name of the sensor on Airflow
1045
- description : str
1046
- Description of sensor in the Airflow UI
1047
- bucket_key : Union[str, List[str]]
1048
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1049
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1050
- bucket_name : str
1051
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1052
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1053
- wildcard_match : bool
1054
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1055
- aws_conn_id : str
1056
- a reference to the s3 connection on Airflow. (Default: None)
1057
- verify : bool
1058
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1059
- """
1060
- ...
1061
-
1062
1292
  @typing.overload
1063
1293
  def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1064
1294
  """
@@ -1102,247 +1332,61 @@ def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] =
1102
1332
  flow : Union[str, Dict[str, str]], optional, default None
1103
1333
  Upstream flow dependency for this flow.
1104
1334
  flows : List[Union[str, Dict[str, str]]], default []
1105
- Upstream flow dependencies for this flow.
1106
- options : Dict[str, Any], default {}
1107
- Backend-specific configuration for tuning eventing behavior.
1108
- """
1109
- ...
1110
-
1111
- @typing.overload
1112
- def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1113
- ...
1114
-
1115
- def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1116
- """
1117
- Specifies the flow(s) that this flow depends on.
1118
-
1119
- ```
1120
- @trigger_on_finish(flow='FooFlow')
1121
- ```
1122
- or
1123
- ```
1124
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1125
- ```
1126
- This decorator respects the @project decorator and triggers the flow
1127
- when upstream runs within the same namespace complete successfully
1128
-
1129
- Additionally, you can specify project aware upstream flow dependencies
1130
- by specifying the fully qualified project_flow_name.
1131
- ```
1132
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1133
- ```
1134
- or
1135
- ```
1136
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1137
- ```
1138
-
1139
- You can also specify just the project or project branch (other values will be
1140
- inferred from the current project or project branch):
1141
- ```
1142
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1143
- ```
1144
-
1145
- Note that `branch` is typically one of:
1146
- - `prod`
1147
- - `user.bob`
1148
- - `test.my_experiment`
1149
- - `prod.staging`
1150
-
1151
-
1152
- Parameters
1153
- ----------
1154
- flow : Union[str, Dict[str, str]], optional, default None
1155
- Upstream flow dependency for this flow.
1156
- flows : List[Union[str, Dict[str, str]]], default []
1157
- Upstream flow dependencies for this flow.
1158
- options : Dict[str, Any], default {}
1159
- Backend-specific configuration for tuning eventing behavior.
1160
- """
1161
- ...
1162
-
1163
- @typing.overload
1164
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1165
- """
1166
- Specifies the Conda environment for all steps of the flow.
1167
-
1168
- Use `@conda_base` to set common libraries required by all
1169
- steps and use `@conda` to specify step-specific additions.
1170
-
1171
-
1172
- Parameters
1173
- ----------
1174
- packages : Dict[str, str], default {}
1175
- Packages to use for this flow. The key is the name of the package
1176
- and the value is the version to use.
1177
- libraries : Dict[str, str], default {}
1178
- Supported for backward compatibility. When used with packages, packages will take precedence.
1179
- python : str, optional, default None
1180
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1181
- that the version used will correspond to the version of the Python interpreter used to start the run.
1182
- disabled : bool, default False
1183
- If set to True, disables Conda.
1184
- """
1185
- ...
1186
-
1187
- @typing.overload
1188
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1189
- ...
1190
-
1191
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1192
- """
1193
- Specifies the Conda environment for all steps of the flow.
1194
-
1195
- Use `@conda_base` to set common libraries required by all
1196
- steps and use `@conda` to specify step-specific additions.
1197
-
1198
-
1199
- Parameters
1200
- ----------
1201
- packages : Dict[str, str], default {}
1202
- Packages to use for this flow. The key is the name of the package
1203
- and the value is the version to use.
1204
- libraries : Dict[str, str], default {}
1205
- Supported for backward compatibility. When used with packages, packages will take precedence.
1206
- python : str, optional, default None
1207
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1208
- that the version used will correspond to the version of the Python interpreter used to start the run.
1209
- disabled : bool, default False
1210
- If set to True, disables Conda.
1211
- """
1212
- ...
1213
-
1214
- @typing.overload
1215
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1216
- """
1217
- Specifies the times when the flow should be run when running on a
1218
- production scheduler.
1219
-
1220
-
1221
- Parameters
1222
- ----------
1223
- hourly : bool, default False
1224
- Run the workflow hourly.
1225
- daily : bool, default True
1226
- Run the workflow daily.
1227
- weekly : bool, default False
1228
- Run the workflow weekly.
1229
- cron : str, optional, default None
1230
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1231
- specified by this expression.
1232
- timezone : str, optional, default None
1233
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1234
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1235
- """
1236
- ...
1237
-
1238
- @typing.overload
1239
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1240
- ...
1241
-
1242
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1243
- """
1244
- Specifies the times when the flow should be run when running on a
1245
- production scheduler.
1246
-
1247
-
1248
- Parameters
1249
- ----------
1250
- hourly : bool, default False
1251
- Run the workflow hourly.
1252
- daily : bool, default True
1253
- Run the workflow daily.
1254
- weekly : bool, default False
1255
- Run the workflow weekly.
1256
- cron : str, optional, default None
1257
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1258
- specified by this expression.
1259
- timezone : str, optional, default None
1260
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1261
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1262
- """
1263
- ...
1264
-
1265
- @typing.overload
1266
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1267
- """
1268
- Specifies the PyPI packages for all steps of the flow.
1269
-
1270
- Use `@pypi_base` to set common packages required by all
1271
- steps and use `@pypi` to specify step-specific overrides.
1272
-
1273
- Parameters
1274
- ----------
1275
- packages : Dict[str, str], default: {}
1276
- Packages to use for this flow. The key is the name of the package
1277
- and the value is the version to use.
1278
- python : str, optional, default: None
1279
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1280
- that the version used will correspond to the version of the Python interpreter used to start the run.
1281
- """
1282
- ...
1283
-
1284
- @typing.overload
1285
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1286
- ...
1287
-
1288
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1289
- """
1290
- Specifies the PyPI packages for all steps of the flow.
1291
-
1292
- Use `@pypi_base` to set common packages required by all
1293
- steps and use `@pypi` to specify step-specific overrides.
1294
-
1295
- Parameters
1296
- ----------
1297
- packages : Dict[str, str], default: {}
1298
- Packages to use for this flow. The key is the name of the package
1299
- and the value is the version to use.
1300
- python : str, optional, default: None
1301
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1302
- that the version used will correspond to the version of the Python interpreter used to start the run.
1335
+ Upstream flow dependencies for this flow.
1336
+ options : Dict[str, Any], default {}
1337
+ Backend-specific configuration for tuning eventing behavior.
1303
1338
  """
1304
1339
  ...
1305
1340
 
1306
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1341
+ @typing.overload
1342
+ def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1343
+ ...
1344
+
1345
+ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1307
1346
  """
1308
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1309
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1347
+ Specifies the flow(s) that this flow depends on.
1348
+
1349
+ ```
1350
+ @trigger_on_finish(flow='FooFlow')
1351
+ ```
1352
+ or
1353
+ ```
1354
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1355
+ ```
1356
+ This decorator respects the @project decorator and triggers the flow
1357
+ when upstream runs within the same namespace complete successfully
1358
+
1359
+ Additionally, you can specify project aware upstream flow dependencies
1360
+ by specifying the fully qualified project_flow_name.
1361
+ ```
1362
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1363
+ ```
1364
+ or
1365
+ ```
1366
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1367
+ ```
1368
+
1369
+ You can also specify just the project or project branch (other values will be
1370
+ inferred from the current project or project branch):
1371
+ ```
1372
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1373
+ ```
1374
+
1375
+ Note that `branch` is typically one of:
1376
+ - `prod`
1377
+ - `user.bob`
1378
+ - `test.my_experiment`
1379
+ - `prod.staging`
1310
1380
 
1311
1381
 
1312
1382
  Parameters
1313
1383
  ----------
1314
- timeout : int
1315
- Time, in seconds before the task times out and fails. (Default: 3600)
1316
- poke_interval : int
1317
- Time in seconds that the job should wait in between each try. (Default: 60)
1318
- mode : str
1319
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1320
- exponential_backoff : bool
1321
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1322
- pool : str
1323
- the slot pool this task should run in,
1324
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1325
- soft_fail : bool
1326
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1327
- name : str
1328
- Name of the sensor on Airflow
1329
- description : str
1330
- Description of sensor in the Airflow UI
1331
- external_dag_id : str
1332
- The dag_id that contains the task you want to wait for.
1333
- external_task_ids : List[str]
1334
- The list of task_ids that you want to wait for.
1335
- If None (default value) the sensor waits for the DAG. (Default: None)
1336
- allowed_states : List[str]
1337
- Iterable of allowed states, (Default: ['success'])
1338
- failed_states : List[str]
1339
- Iterable of failed or dis-allowed states. (Default: None)
1340
- execution_delta : datetime.timedelta
1341
- time difference with the previous execution to look at,
1342
- the default is the same logical date as the current task or DAG. (Default: None)
1343
- check_existence: bool
1344
- Set to True to check if the external task exists or check if
1345
- the DAG to wait for exists. (Default: True)
1384
+ flow : Union[str, Dict[str, str]], optional, default None
1385
+ Upstream flow dependency for this flow.
1386
+ flows : List[Union[str, Dict[str, str]]], default []
1387
+ Upstream flow dependencies for this flow.
1388
+ options : Dict[str, Any], default {}
1389
+ Backend-specific configuration for tuning eventing behavior.
1346
1390
  """
1347
1391
  ...
1348
1392
 
@@ -1439,117 +1483,87 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1439
1483
  """
1440
1484
  ...
1441
1485
 
1442
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1486
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1443
1487
  """
1444
- Allows setting external datastores to save data for the
1445
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1446
-
1447
- This decorator is useful when users wish to save data to a different datastore
1448
- than what is configured in Metaflow. This can be for variety of reasons:
1488
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1489
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1490
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1491
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1492
+ starts only after all sensors finish.
1449
1493
 
1450
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1451
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1452
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1453
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1454
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1455
1494
 
1456
- Usage:
1495
+ Parameters
1457
1496
  ----------
1497
+ timeout : int
1498
+ Time, in seconds before the task times out and fails. (Default: 3600)
1499
+ poke_interval : int
1500
+ Time in seconds that the job should wait in between each try. (Default: 60)
1501
+ mode : str
1502
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1503
+ exponential_backoff : bool
1504
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1505
+ pool : str
1506
+ the slot pool this task should run in,
1507
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1508
+ soft_fail : bool
1509
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1510
+ name : str
1511
+ Name of the sensor on Airflow
1512
+ description : str
1513
+ Description of sensor in the Airflow UI
1514
+ bucket_key : Union[str, List[str]]
1515
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1516
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1517
+ bucket_name : str
1518
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1519
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1520
+ wildcard_match : bool
1521
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1522
+ aws_conn_id : str
1523
+ a reference to the s3 connection on Airflow. (Default: None)
1524
+ verify : bool
1525
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1526
+ """
1527
+ ...
1528
+
1529
+ @typing.overload
1530
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1531
+ """
1532
+ Specifies the PyPI packages for all steps of the flow.
1458
1533
 
1459
- - Using a custom IAM role to access the datastore.
1460
-
1461
- ```python
1462
- @with_artifact_store(
1463
- type="s3",
1464
- config=lambda: {
1465
- "root": "s3://my-bucket-foo/path/to/root",
1466
- "role_arn": ROLE,
1467
- },
1468
- )
1469
- class MyFlow(FlowSpec):
1470
-
1471
- @checkpoint
1472
- @step
1473
- def start(self):
1474
- with open("my_file.txt", "w") as f:
1475
- f.write("Hello, World!")
1476
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1477
- self.next(self.end)
1478
-
1479
- ```
1480
-
1481
- - Using credentials to access the s3-compatible datastore.
1482
-
1483
- ```python
1484
- @with_artifact_store(
1485
- type="s3",
1486
- config=lambda: {
1487
- "root": "s3://my-bucket-foo/path/to/root",
1488
- "client_params": {
1489
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1490
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1491
- },
1492
- },
1493
- )
1494
- class MyFlow(FlowSpec):
1495
-
1496
- @checkpoint
1497
- @step
1498
- def start(self):
1499
- with open("my_file.txt", "w") as f:
1500
- f.write("Hello, World!")
1501
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1502
- self.next(self.end)
1503
-
1504
- ```
1505
-
1506
- - Accessing objects stored in external datastores after task execution.
1507
-
1508
- ```python
1509
- run = Run("CheckpointsTestsFlow/8992")
1510
- with artifact_store_from(run=run, config={
1511
- "client_params": {
1512
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1513
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1514
- },
1515
- }):
1516
- with Checkpoint() as cp:
1517
- latest = cp.list(
1518
- task=run["start"].task
1519
- )[0]
1520
- print(latest)
1521
- cp.load(
1522
- latest,
1523
- "test-checkpoints"
1524
- )
1534
+ Use `@pypi_base` to set common packages required by all
1535
+ steps and use `@pypi` to specify step-specific overrides.
1525
1536
 
1526
- task = Task("TorchTuneFlow/8484/train/53673")
1527
- with artifact_store_from(run=run, config={
1528
- "client_params": {
1529
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1530
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1531
- },
1532
- }):
1533
- load_model(
1534
- task.data.model_ref,
1535
- "test-models"
1536
- )
1537
- ```
1538
- Parameters:
1537
+ Parameters
1539
1538
  ----------
1539
+ packages : Dict[str, str], default: {}
1540
+ Packages to use for this flow. The key is the name of the package
1541
+ and the value is the version to use.
1542
+ python : str, optional, default: None
1543
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1544
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1545
+ """
1546
+ ...
1547
+
1548
+ @typing.overload
1549
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1550
+ ...
1551
+
1552
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1553
+ """
1554
+ Specifies the PyPI packages for all steps of the flow.
1540
1555
 
1541
- type: str
1542
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1556
+ Use `@pypi_base` to set common packages required by all
1557
+ steps and use `@pypi` to specify step-specific overrides.
1543
1558
 
1544
- config: dict or Callable
1545
- Dictionary of configuration options for the datastore. The following keys are required:
1546
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1547
- - example: 's3://bucket-name/path/to/root'
1548
- - example: 'gs://bucket-name/path/to/root'
1549
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1550
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1551
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1552
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1559
+ Parameters
1560
+ ----------
1561
+ packages : Dict[str, str], default: {}
1562
+ Packages to use for this flow. The key is the name of the package
1563
+ and the value is the version to use.
1564
+ python : str, optional, default: None
1565
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1566
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1553
1567
  """
1554
1568
  ...
1555
1569