ob-metaflow-stubs 6.0.3.159__py2.py3-none-any.whl → 6.0.3.160__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. metaflow-stubs/__init__.pyi +790 -788
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +5 -5
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +4 -4
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +2 -2
  14. metaflow-stubs/info_file.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +1 -1
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +88 -88
  21. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  22. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +3 -3
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +3 -3
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +3 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +1 -1
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +1 -1
  63. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  64. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +6 -0
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +58 -0
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +64 -0
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  74. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  75. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  78. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +3 -1
  79. metaflow-stubs/multicore_utils.pyi +1 -1
  80. metaflow-stubs/parameters.pyi +2 -2
  81. metaflow-stubs/plugins/__init__.pyi +12 -12
  82. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  83. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  84. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  85. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  86. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  87. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  88. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  89. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  90. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  91. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  92. metaflow-stubs/plugins/argo/argo_workflows.pyi +1 -1
  93. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +1 -1
  94. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +1 -1
  95. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +2 -2
  96. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  97. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  98. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  99. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  100. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  101. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  102. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  103. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  104. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  105. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  106. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  107. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  108. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  109. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  110. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +2 -2
  111. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +2 -2
  112. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  113. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  114. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  115. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  116. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  117. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  118. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  119. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  120. metaflow-stubs/plugins/cards/card_client.pyi +1 -1
  121. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  122. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  123. metaflow-stubs/plugins/cards/card_decorator.pyi +1 -1
  124. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  125. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  126. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  127. metaflow-stubs/plugins/cards/card_modules/components.pyi +3 -3
  128. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  129. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  130. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  131. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  132. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  133. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  134. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  135. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  136. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  137. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  138. metaflow-stubs/plugins/datatools/s3/s3.pyi +1 -1
  139. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  140. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  141. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  142. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  143. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  144. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  145. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  146. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  147. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  148. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  149. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  150. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  151. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  152. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  153. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  154. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +1 -1
  155. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  156. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  157. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  158. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  159. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  160. metaflow-stubs/plugins/ollama/__init__.pyi +1 -1
  161. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  162. metaflow-stubs/plugins/perimeters.pyi +1 -1
  163. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  164. metaflow-stubs/plugins/pypi/__init__.pyi +1 -1
  165. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  166. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -4
  167. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  168. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  169. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  170. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  171. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  172. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  173. metaflow-stubs/plugins/secrets/__init__.pyi +1 -1
  174. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  175. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  176. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  177. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  178. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  179. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  180. metaflow-stubs/profilers/__init__.pyi +1 -1
  181. metaflow-stubs/pylint_wrapper.pyi +1 -1
  182. metaflow-stubs/runner/__init__.pyi +1 -1
  183. metaflow-stubs/runner/deployer.pyi +29 -29
  184. metaflow-stubs/runner/deployer_impl.pyi +1 -1
  185. metaflow-stubs/runner/metaflow_runner.pyi +2 -2
  186. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  187. metaflow-stubs/runner/nbrun.pyi +1 -1
  188. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  189. metaflow-stubs/runner/utils.pyi +3 -3
  190. metaflow-stubs/system/__init__.pyi +1 -1
  191. metaflow-stubs/system/system_logger.pyi +1 -1
  192. metaflow-stubs/system/system_monitor.pyi +1 -1
  193. metaflow-stubs/tagging_util.pyi +1 -1
  194. metaflow-stubs/tuple_util.pyi +1 -1
  195. metaflow-stubs/user_configs/__init__.pyi +1 -1
  196. metaflow-stubs/user_configs/config_decorators.pyi +4 -4
  197. metaflow-stubs/user_configs/config_options.pyi +2 -2
  198. metaflow-stubs/user_configs/config_parameters.pyi +4 -4
  199. {ob_metaflow_stubs-6.0.3.159.dist-info → ob_metaflow_stubs-6.0.3.160.dist-info}/METADATA +1 -1
  200. ob_metaflow_stubs-6.0.3.160.dist-info/RECORD +203 -0
  201. ob_metaflow_stubs-6.0.3.159.dist-info/RECORD +0 -200
  202. {ob_metaflow_stubs-6.0.3.159.dist-info → ob_metaflow_stubs-6.0.3.160.dist-info}/WHEEL +0 -0
  203. {ob_metaflow_stubs-6.0.3.159.dist-info → ob_metaflow_stubs-6.0.3.160.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.15.7.2+obcheckpoint(0.2.1);ob(v1) #
4
- # Generated on 2025-04-18T04:20:36.937065 #
4
+ # Generated on 2025-05-01T00:24:18.378249 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -35,16 +35,16 @@ from .user_configs.config_parameters import ConfigValue as ConfigValue
35
35
  from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
+ from . import tuple_util as tuple_util
38
39
  from . import cards as cards
39
40
  from . import events as events
40
- from . import tuple_util as tuple_util
41
41
  from . import runner as runner
42
42
  from . import plugins as plugins
43
43
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
44
44
  from . import includefile as includefile
45
45
  from .includefile import IncludeFile as IncludeFile
46
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
47
46
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
47
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
48
48
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
49
  from . import client as client
50
50
  from .client.core import namespace as namespace
@@ -69,6 +69,8 @@ from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.datastructur
69
69
  from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.datastore.context import artifact_store_from as artifact_store_from
70
70
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import get_aws_client as get_aws_client
71
71
  from .mf_extensions.outerbounds.plugins.snowflake.snowflake import Snowflake as Snowflake
72
+ from .mf_extensions.outerbounds.plugins.checkpoint_datastores.nebius import nebius_checkpoints as nebius_checkpoints
73
+ from .mf_extensions.outerbounds.plugins.checkpoint_datastores.coreweave import coreweave_checkpoints as coreweave_checkpoints
72
74
  from . import cli_components as cli_components
73
75
  from . import system as system
74
76
  from . import pylint_wrapper as pylint_wrapper
@@ -150,823 +152,823 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
150
152
  """
151
153
  ...
152
154
 
153
- def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
155
+ @typing.overload
156
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
154
157
  """
155
- Specifies that this step is used to deploy an instance of the app.
156
- Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
158
+ Specifies that the step will success under all circumstances.
159
+
160
+ The decorator will create an optional artifact, specified by `var`, which
161
+ contains the exception raised. You can use it to detect the presence
162
+ of errors, indicating that all happy-path artifacts produced by the step
163
+ are missing.
157
164
 
158
165
 
159
166
  Parameters
160
167
  ----------
161
- app_port : int
162
- Number of GPUs to use.
163
- app_name : str
164
- Name of the app to deploy.
168
+ var : str, optional, default None
169
+ Name of the artifact in which to store the caught exception.
170
+ If not specified, the exception is not stored.
171
+ print_exception : bool, default True
172
+ Determines whether or not the exception is printed to
173
+ stdout when caught.
165
174
  """
166
175
  ...
167
176
 
168
- def ollama(*, models: "list[Ollama]", backend: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
177
+ @typing.overload
178
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
179
+ ...
180
+
181
+ @typing.overload
182
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
183
+ ...
184
+
185
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
169
186
  """
170
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
171
-
172
- User code call
173
- -----------
174
- @ollama(
175
- models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
176
- backend='local'
177
- )
178
-
179
- Valid backend options
180
- ---------------------
181
- - 'local': Run as a separate process on the local task machine.
182
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
183
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
187
+ Specifies that the step will success under all circumstances.
184
188
 
185
- Valid model options
186
- ----------------
187
- - 'llama3.2'
188
- - 'llama3.3'
189
- - any model here https://ollama.com/search
189
+ The decorator will create an optional artifact, specified by `var`, which
190
+ contains the exception raised. You can use it to detect the presence
191
+ of errors, indicating that all happy-path artifacts produced by the step
192
+ are missing.
190
193
 
191
194
 
192
195
  Parameters
193
196
  ----------
194
- models: list[Ollama]
195
- List of Ollama containers running models in sidecars.
196
- backend: str
197
- Determines where and how to run the Ollama process.
197
+ var : str, optional, default None
198
+ Name of the artifact in which to store the caught exception.
199
+ If not specified, the exception is not stored.
200
+ print_exception : bool, default True
201
+ Determines whether or not the exception is printed to
202
+ stdout when caught.
198
203
  """
199
204
  ...
200
205
 
201
206
  @typing.overload
202
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
207
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
203
208
  """
204
- Specifies secrets to be retrieved and injected as environment variables prior to
205
- the execution of a step.
209
+ Specifies the number of times the task corresponding
210
+ to a step needs to be retried.
211
+
212
+ This decorator is useful for handling transient errors, such as networking issues.
213
+ If your task contains operations that can't be retried safely, e.g. database updates,
214
+ it is advisable to annotate it with `@retry(times=0)`.
215
+
216
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
217
+ decorator will execute a no-op task after all retries have been exhausted,
218
+ ensuring that the flow execution can continue.
206
219
 
207
220
 
208
221
  Parameters
209
222
  ----------
210
- sources : List[Union[str, Dict[str, Any]]], default: []
211
- List of secret specs, defining how the secrets are to be retrieved
223
+ times : int, default 3
224
+ Number of times to retry this task.
225
+ minutes_between_retries : int, default 2
226
+ Number of minutes between retries.
212
227
  """
213
228
  ...
214
229
 
215
230
  @typing.overload
216
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
231
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
217
232
  ...
218
233
 
219
234
  @typing.overload
220
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
235
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
221
236
  ...
222
237
 
223
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
238
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
224
239
  """
225
- Specifies secrets to be retrieved and injected as environment variables prior to
226
- the execution of a step.
240
+ Specifies the number of times the task corresponding
241
+ to a step needs to be retried.
242
+
243
+ This decorator is useful for handling transient errors, such as networking issues.
244
+ If your task contains operations that can't be retried safely, e.g. database updates,
245
+ it is advisable to annotate it with `@retry(times=0)`.
246
+
247
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
248
+ decorator will execute a no-op task after all retries have been exhausted,
249
+ ensuring that the flow execution can continue.
227
250
 
228
251
 
229
252
  Parameters
230
253
  ----------
231
- sources : List[Union[str, Dict[str, Any]]], default: []
232
- List of secret specs, defining how the secrets are to be retrieved
254
+ times : int, default 3
255
+ Number of times to retry this task.
256
+ minutes_between_retries : int, default 2
257
+ Number of minutes between retries.
258
+ """
259
+ ...
260
+
261
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
262
+ """
263
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
264
+
265
+
266
+ Parameters
267
+ ----------
268
+ temp_dir_root : str, optional
269
+ The root directory that will hold the temporary directory where objects will be downloaded.
270
+
271
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
272
+ The list of repos (models/datasets) to load.
273
+
274
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
275
+
276
+ - If repo (model/dataset) is not found in the datastore:
277
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
278
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
279
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
280
+
281
+ - If repo is found in the datastore:
282
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
283
+ """
284
+ ...
285
+
286
+ def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
287
+ """
288
+ Specifies that this step is used to deploy an instance of the app.
289
+ Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
290
+
291
+
292
+ Parameters
293
+ ----------
294
+ app_port : int
295
+ Number of GPUs to use.
296
+ app_name : str
297
+ Name of the app to deploy.
233
298
  """
234
299
  ...
235
300
 
236
301
  @typing.overload
237
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
302
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
238
303
  """
239
- Specifies the Conda environment for the step.
304
+ Specifies a timeout for your step.
240
305
 
241
- Information in this decorator will augment any
242
- attributes set in the `@conda_base` flow-level decorator. Hence,
243
- you can use `@conda_base` to set packages required by all
244
- steps and use `@conda` to specify step-specific overrides.
306
+ This decorator is useful if this step may hang indefinitely.
307
+
308
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
309
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
310
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
311
+
312
+ Note that all the values specified in parameters are added together so if you specify
313
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
245
314
 
246
315
 
247
316
  Parameters
248
317
  ----------
249
- packages : Dict[str, str], default {}
250
- Packages to use for this step. The key is the name of the package
251
- and the value is the version to use.
252
- libraries : Dict[str, str], default {}
253
- Supported for backward compatibility. When used with packages, packages will take precedence.
254
- python : str, optional, default None
255
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
256
- that the version used will correspond to the version of the Python interpreter used to start the run.
257
- disabled : bool, default False
258
- If set to True, disables @conda.
318
+ seconds : int, default 0
319
+ Number of seconds to wait prior to timing out.
320
+ minutes : int, default 0
321
+ Number of minutes to wait prior to timing out.
322
+ hours : int, default 0
323
+ Number of hours to wait prior to timing out.
259
324
  """
260
325
  ...
261
326
 
262
327
  @typing.overload
263
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
328
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
264
329
  ...
265
330
 
266
331
  @typing.overload
267
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
332
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
268
333
  ...
269
334
 
270
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
335
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
271
336
  """
272
- Specifies the Conda environment for the step.
337
+ Specifies a timeout for your step.
273
338
 
274
- Information in this decorator will augment any
275
- attributes set in the `@conda_base` flow-level decorator. Hence,
276
- you can use `@conda_base` to set packages required by all
277
- steps and use `@conda` to specify step-specific overrides.
339
+ This decorator is useful if this step may hang indefinitely.
340
+
341
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
342
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
343
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
344
+
345
+ Note that all the values specified in parameters are added together so if you specify
346
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
278
347
 
279
348
 
280
349
  Parameters
281
350
  ----------
282
- packages : Dict[str, str], default {}
283
- Packages to use for this step. The key is the name of the package
284
- and the value is the version to use.
285
- libraries : Dict[str, str], default {}
286
- Supported for backward compatibility. When used with packages, packages will take precedence.
287
- python : str, optional, default None
288
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
289
- that the version used will correspond to the version of the Python interpreter used to start the run.
290
- disabled : bool, default False
291
- If set to True, disables @conda.
351
+ seconds : int, default 0
352
+ Number of seconds to wait prior to timing out.
353
+ minutes : int, default 0
354
+ Number of minutes to wait prior to timing out.
355
+ hours : int, default 0
356
+ Number of hours to wait prior to timing out.
292
357
  """
293
358
  ...
294
359
 
295
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable') -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
360
+ @typing.overload
361
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
296
362
  """
297
- Specifies that this step should execute on Kubernetes.
363
+ Internal decorator to support Fast bakery
364
+ """
365
+ ...
366
+
367
+ @typing.overload
368
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
369
+ ...
370
+
371
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
372
+ """
373
+ Internal decorator to support Fast bakery
374
+ """
375
+ ...
376
+
377
+ @typing.overload
378
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
379
+ """
380
+ Creates a human-readable report, a Metaflow Card, after this step completes.
381
+
382
+ Note that you may add multiple `@card` decorators in a step with different parameters.
298
383
 
299
384
 
300
385
  Parameters
301
386
  ----------
302
- cpu : int, default 1
303
- Number of CPUs required for this step. If `@resources` is
304
- also present, the maximum value from all decorators is used.
305
- memory : int, default 4096
306
- Memory size (in MB) required for this step. If
307
- `@resources` is also present, the maximum value from all decorators is
308
- used.
309
- disk : int, default 10240
310
- Disk size (in MB) required for this step. If
311
- `@resources` is also present, the maximum value from all decorators is
312
- used.
313
- image : str, optional, default None
314
- Docker image to use when launching on Kubernetes. If not specified, and
315
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
316
- not, a default Docker image mapping to the current version of Python is used.
317
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
318
- If given, the imagePullPolicy to be applied to the Docker image of the step.
319
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
320
- Kubernetes service account to use when launching pod in Kubernetes.
321
- secrets : List[str], optional, default None
322
- Kubernetes secrets to use when launching pod in Kubernetes. These
323
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
324
- in Metaflow configuration.
325
- node_selector: Union[Dict[str,str], str], optional, default None
326
- Kubernetes node selector(s) to apply to the pod running the task.
327
- Can be passed in as a comma separated string of values e.g.
328
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
329
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
330
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
331
- Kubernetes namespace to use when launching pod in Kubernetes.
332
- gpu : int, optional, default None
333
- Number of GPUs required for this step. A value of zero implies that
334
- the scheduled node should not have GPUs.
335
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
336
- The vendor of the GPUs to be used for this step.
337
- tolerations : List[str], default []
338
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
339
- Kubernetes tolerations to use when launching pod in Kubernetes.
340
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
341
- Kubernetes labels to use when launching pod in Kubernetes.
342
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
343
- Kubernetes annotations to use when launching pod in Kubernetes.
344
- use_tmpfs : bool, default False
345
- This enables an explicit tmpfs mount for this step.
346
- tmpfs_tempdir : bool, default True
347
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
348
- tmpfs_size : int, optional, default: None
349
- The value for the size (in MiB) of the tmpfs mount for this step.
350
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
351
- memory allocated for this step.
352
- tmpfs_path : str, optional, default /metaflow_temp
353
- Path to tmpfs mount for this step.
354
- persistent_volume_claims : Dict[str, str], optional, default None
355
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
356
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
357
- shared_memory: int, optional
358
- Shared memory size (in MiB) required for this step
359
- port: int, optional
360
- Port number to specify in the Kubernetes job object
361
- compute_pool : str, optional, default None
362
- Compute pool to be used for for this step.
363
- If not specified, any accessible compute pool within the perimeter is used.
364
- hostname_resolution_timeout: int, default 10 * 60
365
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
366
- Only applicable when @parallel is used.
367
- qos: str, default: Burstable
368
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
387
+ type : str, default 'default'
388
+ Card type.
389
+ id : str, optional, default None
390
+ If multiple cards are present, use this id to identify this card.
391
+ options : Dict[str, Any], default {}
392
+ Options passed to the card. The contents depend on the card type.
393
+ timeout : int, default 45
394
+ Interrupt reporting if it takes more than this many seconds.
369
395
  """
370
396
  ...
371
397
 
372
398
  @typing.overload
373
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
399
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
400
+ ...
401
+
402
+ @typing.overload
403
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
404
+ ...
405
+
406
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
374
407
  """
375
- Specifies the resources needed when executing this step.
376
-
377
- Use `@resources` to specify the resource requirements
378
- independently of the specific compute layer (`@batch`, `@kubernetes`).
408
+ Creates a human-readable report, a Metaflow Card, after this step completes.
379
409
 
380
- You can choose the compute layer on the command line by executing e.g.
381
- ```
382
- python myflow.py run --with batch
383
- ```
384
- or
385
- ```
386
- python myflow.py run --with kubernetes
387
- ```
388
- which executes the flow on the desired system using the
389
- requirements specified in `@resources`.
410
+ Note that you may add multiple `@card` decorators in a step with different parameters.
390
411
 
391
412
 
392
413
  Parameters
393
414
  ----------
394
- cpu : int, default 1
395
- Number of CPUs required for this step.
396
- gpu : int, optional, default None
397
- Number of GPUs required for this step.
398
- disk : int, optional, default None
399
- Disk size (in MB) required for this step. Only applies on Kubernetes.
400
- memory : int, default 4096
401
- Memory size (in MB) required for this step.
402
- shared_memory : int, optional, default None
403
- The value for the size (in MiB) of the /dev/shm volume for this step.
404
- This parameter maps to the `--shm-size` option in Docker.
415
+ type : str, default 'default'
416
+ Card type.
417
+ id : str, optional, default None
418
+ If multiple cards are present, use this id to identify this card.
419
+ options : Dict[str, Any], default {}
420
+ Options passed to the card. The contents depend on the card type.
421
+ timeout : int, default 45
422
+ Interrupt reporting if it takes more than this many seconds.
405
423
  """
406
424
  ...
407
425
 
408
- @typing.overload
409
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
410
- ...
411
-
412
- @typing.overload
413
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
414
- ...
415
-
416
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
426
+ def nim(*, models: "list[NIM]", backend: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
417
427
  """
418
- Specifies the resources needed when executing this step.
428
+ This decorator is used to run NIM containers in Metaflow tasks as sidecars.
419
429
 
420
- Use `@resources` to specify the resource requirements
421
- independently of the specific compute layer (`@batch`, `@kubernetes`).
430
+ User code call
431
+ -----------
432
+ @nim(
433
+ models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
434
+ backend='managed'
435
+ )
422
436
 
423
- You can choose the compute layer on the command line by executing e.g.
424
- ```
425
- python myflow.py run --with batch
426
- ```
427
- or
428
- ```
429
- python myflow.py run --with kubernetes
430
- ```
431
- which executes the flow on the desired system using the
432
- requirements specified in `@resources`.
437
+ Valid backend options
438
+ ---------------------
439
+ - 'managed': Outerbounds selects a compute provider based on the model.
440
+
441
+ Valid model options
442
+ ----------------
443
+ - 'meta/llama3-8b-instruct': 8B parameter model
444
+ - 'meta/llama3-70b-instruct': 70B parameter model
445
+ - any model here: https://nvcf.ngc.nvidia.com/functions?filter=nvidia-functions
433
446
 
434
447
 
435
448
  Parameters
436
449
  ----------
437
- cpu : int, default 1
438
- Number of CPUs required for this step.
439
- gpu : int, optional, default None
440
- Number of GPUs required for this step.
441
- disk : int, optional, default None
442
- Disk size (in MB) required for this step. Only applies on Kubernetes.
443
- memory : int, default 4096
444
- Memory size (in MB) required for this step.
445
- shared_memory : int, optional, default None
446
- The value for the size (in MiB) of the /dev/shm volume for this step.
447
- This parameter maps to the `--shm-size` option in Docker.
450
+ models: list[NIM]
451
+ List of NIM containers running models in sidecars.
452
+ backend: str
453
+ Compute provider to run the NIM container.
454
+ queue_timeout : int
455
+ Time to keep the job in NVCF's queue.
448
456
  """
449
457
  ...
450
458
 
451
459
  @typing.overload
452
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
460
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
453
461
  """
454
- Specifies environment variables to be set prior to the execution of a step.
462
+ Specifies the Conda environment for the step.
463
+
464
+ Information in this decorator will augment any
465
+ attributes set in the `@conda_base` flow-level decorator. Hence,
466
+ you can use `@conda_base` to set packages required by all
467
+ steps and use `@conda` to specify step-specific overrides.
455
468
 
456
469
 
457
470
  Parameters
458
471
  ----------
459
- vars : Dict[str, str], default {}
460
- Dictionary of environment variables to set.
472
+ packages : Dict[str, str], default {}
473
+ Packages to use for this step. The key is the name of the package
474
+ and the value is the version to use.
475
+ libraries : Dict[str, str], default {}
476
+ Supported for backward compatibility. When used with packages, packages will take precedence.
477
+ python : str, optional, default None
478
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
479
+ that the version used will correspond to the version of the Python interpreter used to start the run.
480
+ disabled : bool, default False
481
+ If set to True, disables @conda.
461
482
  """
462
483
  ...
463
484
 
464
485
  @typing.overload
465
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
486
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
466
487
  ...
467
488
 
468
489
  @typing.overload
469
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
490
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
470
491
  ...
471
492
 
472
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
493
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
473
494
  """
474
- Specifies environment variables to be set prior to the execution of a step.
495
+ Specifies the Conda environment for the step.
496
+
497
+ Information in this decorator will augment any
498
+ attributes set in the `@conda_base` flow-level decorator. Hence,
499
+ you can use `@conda_base` to set packages required by all
500
+ steps and use `@conda` to specify step-specific overrides.
475
501
 
476
502
 
477
503
  Parameters
478
504
  ----------
479
- vars : Dict[str, str], default {}
480
- Dictionary of environment variables to set.
505
+ packages : Dict[str, str], default {}
506
+ Packages to use for this step. The key is the name of the package
507
+ and the value is the version to use.
508
+ libraries : Dict[str, str], default {}
509
+ Supported for backward compatibility. When used with packages, packages will take precedence.
510
+ python : str, optional, default None
511
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
512
+ that the version used will correspond to the version of the Python interpreter used to start the run.
513
+ disabled : bool, default False
514
+ If set to True, disables @conda.
481
515
  """
482
516
  ...
483
517
 
484
- @typing.overload
485
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
518
+ def ollama(*, models: "list[Ollama]", backend: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
486
519
  """
487
- Specifies the number of times the task corresponding
488
- to a step needs to be retried.
520
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
489
521
 
490
- This decorator is useful for handling transient errors, such as networking issues.
491
- If your task contains operations that can't be retried safely, e.g. database updates,
492
- it is advisable to annotate it with `@retry(times=0)`.
522
+ User code call
523
+ -----------
524
+ @ollama(
525
+ models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
526
+ backend='local'
527
+ )
493
528
 
494
- This can be used in conjunction with the `@catch` decorator. The `@catch`
495
- decorator will execute a no-op task after all retries have been exhausted,
496
- ensuring that the flow execution can continue.
529
+ Valid backend options
530
+ ---------------------
531
+ - 'local': Run as a separate process on the local task machine.
532
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
533
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
534
+
535
+ Valid model options
536
+ ----------------
537
+ - 'llama3.2'
538
+ - 'llama3.3'
539
+ - any model here https://ollama.com/search
497
540
 
498
541
 
499
542
  Parameters
500
543
  ----------
501
- times : int, default 3
502
- Number of times to retry this task.
503
- minutes_between_retries : int, default 2
504
- Number of minutes between retries.
544
+ models: list[Ollama]
545
+ List of Ollama containers running models in sidecars.
546
+ backend: str
547
+ Determines where and how to run the Ollama process.
505
548
  """
506
549
  ...
507
550
 
508
551
  @typing.overload
509
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
552
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
553
+ """
554
+ Decorator prototype for all step decorators. This function gets specialized
555
+ and imported for all decorators types by _import_plugin_decorators().
556
+ """
510
557
  ...
511
558
 
512
559
  @typing.overload
513
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
560
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
514
561
  ...
515
562
 
516
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
563
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
517
564
  """
518
- Specifies the number of times the task corresponding
519
- to a step needs to be retried.
520
-
521
- This decorator is useful for handling transient errors, such as networking issues.
522
- If your task contains operations that can't be retried safely, e.g. database updates,
523
- it is advisable to annotate it with `@retry(times=0)`.
524
-
525
- This can be used in conjunction with the `@catch` decorator. The `@catch`
526
- decorator will execute a no-op task after all retries have been exhausted,
527
- ensuring that the flow execution can continue.
528
-
529
-
530
- Parameters
531
- ----------
532
- times : int, default 3
533
- Number of times to retry this task.
534
- minutes_between_retries : int, default 2
535
- Number of minutes between retries.
565
+ Decorator prototype for all step decorators. This function gets specialized
566
+ and imported for all decorators types by _import_plugin_decorators().
536
567
  """
537
568
  ...
538
569
 
539
570
  @typing.overload
540
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
571
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
541
572
  """
542
- Specifies a timeout for your step.
543
-
544
- This decorator is useful if this step may hang indefinitely.
545
-
546
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
547
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
548
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
549
-
550
- Note that all the values specified in parameters are added together so if you specify
551
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
573
+ Specifies environment variables to be set prior to the execution of a step.
552
574
 
553
575
 
554
576
  Parameters
555
577
  ----------
556
- seconds : int, default 0
557
- Number of seconds to wait prior to timing out.
558
- minutes : int, default 0
559
- Number of minutes to wait prior to timing out.
560
- hours : int, default 0
561
- Number of hours to wait prior to timing out.
578
+ vars : Dict[str, str], default {}
579
+ Dictionary of environment variables to set.
562
580
  """
563
581
  ...
564
582
 
565
583
  @typing.overload
566
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
584
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
567
585
  ...
568
586
 
569
587
  @typing.overload
570
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
588
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
571
589
  ...
572
590
 
573
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
591
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
574
592
  """
575
- Specifies a timeout for your step.
576
-
577
- This decorator is useful if this step may hang indefinitely.
578
-
579
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
580
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
581
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
582
-
583
- Note that all the values specified in parameters are added together so if you specify
584
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
593
+ Specifies environment variables to be set prior to the execution of a step.
585
594
 
586
595
 
587
596
  Parameters
588
597
  ----------
589
- seconds : int, default 0
590
- Number of seconds to wait prior to timing out.
591
- minutes : int, default 0
592
- Number of minutes to wait prior to timing out.
593
- hours : int, default 0
594
- Number of hours to wait prior to timing out.
598
+ vars : Dict[str, str], default {}
599
+ Dictionary of environment variables to set.
595
600
  """
596
601
  ...
597
602
 
598
603
  @typing.overload
599
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
604
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
600
605
  """
601
- Specifies that the step will success under all circumstances.
606
+ Enables checkpointing for a step.
602
607
 
603
- The decorator will create an optional artifact, specified by `var`, which
604
- contains the exception raised. You can use it to detect the presence
605
- of errors, indicating that all happy-path artifacts produced by the step
606
- are missing.
607
608
 
608
609
 
609
610
  Parameters
610
611
  ----------
611
- var : str, optional, default None
612
- Name of the artifact in which to store the caught exception.
613
- If not specified, the exception is not stored.
614
- print_exception : bool, default True
615
- Determines whether or not the exception is printed to
616
- stdout when caught.
612
+ load_policy : str, default: "fresh"
613
+ The policy for loading the checkpoint. The following policies are supported:
614
+ - "eager": Loads the the latest available checkpoint within the namespace.
615
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
616
+ will be loaded at the start of the task.
617
+ - "none": Do not load any checkpoint
618
+ - "fresh": Loads the lastest checkpoint created within the running Task.
619
+ This mode helps loading checkpoints across various retry attempts of the same task.
620
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
621
+ created within the task will be loaded when the task is retries execution on failure.
622
+
623
+ temp_dir_root : str, default: None
624
+ The root directory under which `current.checkpoint.directory` will be created.
617
625
  """
618
626
  ...
619
627
 
620
628
  @typing.overload
621
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
629
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
622
630
  ...
623
631
 
624
632
  @typing.overload
625
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
633
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
626
634
  ...
627
635
 
628
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
636
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
629
637
  """
630
- Specifies that the step will success under all circumstances.
638
+ Enables checkpointing for a step.
631
639
 
632
- The decorator will create an optional artifact, specified by `var`, which
633
- contains the exception raised. You can use it to detect the presence
634
- of errors, indicating that all happy-path artifacts produced by the step
635
- are missing.
636
640
 
637
641
 
638
642
  Parameters
639
643
  ----------
640
- var : str, optional, default None
641
- Name of the artifact in which to store the caught exception.
642
- If not specified, the exception is not stored.
643
- print_exception : bool, default True
644
- Determines whether or not the exception is printed to
645
- stdout when caught.
644
+ load_policy : str, default: "fresh"
645
+ The policy for loading the checkpoint. The following policies are supported:
646
+ - "eager": Loads the the latest available checkpoint within the namespace.
647
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
648
+ will be loaded at the start of the task.
649
+ - "none": Do not load any checkpoint
650
+ - "fresh": Loads the lastest checkpoint created within the running Task.
651
+ This mode helps loading checkpoints across various retry attempts of the same task.
652
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
653
+ created within the task will be loaded when the task is retries execution on failure.
654
+
655
+ temp_dir_root : str, default: None
656
+ The root directory under which `current.checkpoint.directory` will be created.
646
657
  """
647
658
  ...
648
659
 
649
660
  @typing.overload
650
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
661
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
651
662
  """
652
- Enables loading / saving of models within a step.
653
-
663
+ Specifies secrets to be retrieved and injected as environment variables prior to
664
+ the execution of a step.
654
665
 
655
666
 
656
667
  Parameters
657
668
  ----------
658
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
659
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
660
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
661
- - `current.checkpoint`
662
- - `current.model`
663
- - `current.huggingface_hub`
664
-
665
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
666
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
667
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
668
-
669
- temp_dir_root : str, default: None
670
- The root directory under which `current.model.loaded` will store loaded models
669
+ sources : List[Union[str, Dict[str, Any]]], default: []
670
+ List of secret specs, defining how the secrets are to be retrieved
671
671
  """
672
672
  ...
673
673
 
674
674
  @typing.overload
675
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
675
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
676
676
  ...
677
677
 
678
678
  @typing.overload
679
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
680
- ...
681
-
682
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
683
- """
684
- Enables loading / saving of models within a step.
685
-
686
-
687
-
688
- Parameters
689
- ----------
690
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
691
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
692
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
693
- - `current.checkpoint`
694
- - `current.model`
695
- - `current.huggingface_hub`
696
-
697
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
698
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
699
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
700
-
701
- temp_dir_root : str, default: None
702
- The root directory under which `current.model.loaded` will store loaded models
703
- """
704
- ...
705
-
706
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
707
- """
708
- Specifies that this step should execute on DGX cloud.
709
-
710
-
711
- Parameters
712
- ----------
713
- gpu : int
714
- Number of GPUs to use.
715
- gpu_type : str
716
- Type of Nvidia GPU to use.
717
- queue_timeout : int
718
- Time to keep the job in NVCF's queue.
719
- """
679
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
720
680
  ...
721
681
 
722
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
682
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
723
683
  """
724
- Decorator that helps cache, version and store models/datasets from huggingface hub.
684
+ Specifies secrets to be retrieved and injected as environment variables prior to
685
+ the execution of a step.
725
686
 
726
687
 
727
688
  Parameters
728
689
  ----------
729
- temp_dir_root : str, optional
730
- The root directory that will hold the temporary directory where objects will be downloaded.
731
-
732
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
733
- The list of repos (models/datasets) to load.
734
-
735
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
736
-
737
- - If repo (model/dataset) is not found in the datastore:
738
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
739
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
740
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
741
-
742
- - If repo is found in the datastore:
743
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
690
+ sources : List[Union[str, Dict[str, Any]]], default: []
691
+ List of secret specs, defining how the secrets are to be retrieved
744
692
  """
745
693
  ...
746
694
 
747
695
  @typing.overload
748
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
696
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
749
697
  """
750
- Creates a human-readable report, a Metaflow Card, after this step completes.
698
+ Specifies the PyPI packages for the step.
751
699
 
752
- Note that you may add multiple `@card` decorators in a step with different parameters.
700
+ Information in this decorator will augment any
701
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
702
+ you can use `@pypi_base` to set packages required by all
703
+ steps and use `@pypi` to specify step-specific overrides.
753
704
 
754
705
 
755
706
  Parameters
756
707
  ----------
757
- type : str, default 'default'
758
- Card type.
759
- id : str, optional, default None
760
- If multiple cards are present, use this id to identify this card.
761
- options : Dict[str, Any], default {}
762
- Options passed to the card. The contents depend on the card type.
763
- timeout : int, default 45
764
- Interrupt reporting if it takes more than this many seconds.
708
+ packages : Dict[str, str], default: {}
709
+ Packages to use for this step. The key is the name of the package
710
+ and the value is the version to use.
711
+ python : str, optional, default: None
712
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
713
+ that the version used will correspond to the version of the Python interpreter used to start the run.
765
714
  """
766
715
  ...
767
716
 
768
717
  @typing.overload
769
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
718
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
770
719
  ...
771
720
 
772
721
  @typing.overload
773
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
722
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
774
723
  ...
775
724
 
776
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
725
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
777
726
  """
778
- Creates a human-readable report, a Metaflow Card, after this step completes.
727
+ Specifies the PyPI packages for the step.
779
728
 
780
- Note that you may add multiple `@card` decorators in a step with different parameters.
729
+ Information in this decorator will augment any
730
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
731
+ you can use `@pypi_base` to set packages required by all
732
+ steps and use `@pypi` to specify step-specific overrides.
781
733
 
782
734
 
783
735
  Parameters
784
736
  ----------
785
- type : str, default 'default'
786
- Card type.
787
- id : str, optional, default None
788
- If multiple cards are present, use this id to identify this card.
789
- options : Dict[str, Any], default {}
790
- Options passed to the card. The contents depend on the card type.
791
- timeout : int, default 45
792
- Interrupt reporting if it takes more than this many seconds.
737
+ packages : Dict[str, str], default: {}
738
+ Packages to use for this step. The key is the name of the package
739
+ and the value is the version to use.
740
+ python : str, optional, default: None
741
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
742
+ that the version used will correspond to the version of the Python interpreter used to start the run.
743
+ """
744
+ ...
745
+
746
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable') -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
747
+ """
748
+ Specifies that this step should execute on Kubernetes.
749
+
750
+
751
+ Parameters
752
+ ----------
753
+ cpu : int, default 1
754
+ Number of CPUs required for this step. If `@resources` is
755
+ also present, the maximum value from all decorators is used.
756
+ memory : int, default 4096
757
+ Memory size (in MB) required for this step. If
758
+ `@resources` is also present, the maximum value from all decorators is
759
+ used.
760
+ disk : int, default 10240
761
+ Disk size (in MB) required for this step. If
762
+ `@resources` is also present, the maximum value from all decorators is
763
+ used.
764
+ image : str, optional, default None
765
+ Docker image to use when launching on Kubernetes. If not specified, and
766
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
767
+ not, a default Docker image mapping to the current version of Python is used.
768
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
769
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
770
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
771
+ Kubernetes service account to use when launching pod in Kubernetes.
772
+ secrets : List[str], optional, default None
773
+ Kubernetes secrets to use when launching pod in Kubernetes. These
774
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
775
+ in Metaflow configuration.
776
+ node_selector: Union[Dict[str,str], str], optional, default None
777
+ Kubernetes node selector(s) to apply to the pod running the task.
778
+ Can be passed in as a comma separated string of values e.g.
779
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
780
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
781
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
782
+ Kubernetes namespace to use when launching pod in Kubernetes.
783
+ gpu : int, optional, default None
784
+ Number of GPUs required for this step. A value of zero implies that
785
+ the scheduled node should not have GPUs.
786
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
787
+ The vendor of the GPUs to be used for this step.
788
+ tolerations : List[str], default []
789
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
790
+ Kubernetes tolerations to use when launching pod in Kubernetes.
791
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
792
+ Kubernetes labels to use when launching pod in Kubernetes.
793
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
794
+ Kubernetes annotations to use when launching pod in Kubernetes.
795
+ use_tmpfs : bool, default False
796
+ This enables an explicit tmpfs mount for this step.
797
+ tmpfs_tempdir : bool, default True
798
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
799
+ tmpfs_size : int, optional, default: None
800
+ The value for the size (in MiB) of the tmpfs mount for this step.
801
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
802
+ memory allocated for this step.
803
+ tmpfs_path : str, optional, default /metaflow_temp
804
+ Path to tmpfs mount for this step.
805
+ persistent_volume_claims : Dict[str, str], optional, default None
806
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
807
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
808
+ shared_memory: int, optional
809
+ Shared memory size (in MiB) required for this step
810
+ port: int, optional
811
+ Port number to specify in the Kubernetes job object
812
+ compute_pool : str, optional, default None
813
+ Compute pool to be used for for this step.
814
+ If not specified, any accessible compute pool within the perimeter is used.
815
+ hostname_resolution_timeout: int, default 10 * 60
816
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
817
+ Only applicable when @parallel is used.
818
+ qos: str, default: Burstable
819
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
793
820
  """
794
821
  ...
795
822
 
796
823
  @typing.overload
797
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
824
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
798
825
  """
799
- Enables checkpointing for a step.
826
+ Specifies the resources needed when executing this step.
800
827
 
828
+ Use `@resources` to specify the resource requirements
829
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
830
+
831
+ You can choose the compute layer on the command line by executing e.g.
832
+ ```
833
+ python myflow.py run --with batch
834
+ ```
835
+ or
836
+ ```
837
+ python myflow.py run --with kubernetes
838
+ ```
839
+ which executes the flow on the desired system using the
840
+ requirements specified in `@resources`.
801
841
 
802
842
 
803
843
  Parameters
804
844
  ----------
805
- load_policy : str, default: "fresh"
806
- The policy for loading the checkpoint. The following policies are supported:
807
- - "eager": Loads the the latest available checkpoint within the namespace.
808
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
809
- will be loaded at the start of the task.
810
- - "none": Do not load any checkpoint
811
- - "fresh": Loads the lastest checkpoint created within the running Task.
812
- This mode helps loading checkpoints across various retry attempts of the same task.
813
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
814
- created within the task will be loaded when the task is retries execution on failure.
815
-
816
- temp_dir_root : str, default: None
817
- The root directory under which `current.checkpoint.directory` will be created.
845
+ cpu : int, default 1
846
+ Number of CPUs required for this step.
847
+ gpu : int, optional, default None
848
+ Number of GPUs required for this step.
849
+ disk : int, optional, default None
850
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
851
+ memory : int, default 4096
852
+ Memory size (in MB) required for this step.
853
+ shared_memory : int, optional, default None
854
+ The value for the size (in MiB) of the /dev/shm volume for this step.
855
+ This parameter maps to the `--shm-size` option in Docker.
818
856
  """
819
857
  ...
820
858
 
821
859
  @typing.overload
822
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
860
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
823
861
  ...
824
862
 
825
863
  @typing.overload
826
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
864
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
827
865
  ...
828
866
 
829
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
867
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
830
868
  """
831
- Enables checkpointing for a step.
869
+ Specifies the resources needed when executing this step.
832
870
 
871
+ Use `@resources` to specify the resource requirements
872
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
873
+
874
+ You can choose the compute layer on the command line by executing e.g.
875
+ ```
876
+ python myflow.py run --with batch
877
+ ```
878
+ or
879
+ ```
880
+ python myflow.py run --with kubernetes
881
+ ```
882
+ which executes the flow on the desired system using the
883
+ requirements specified in `@resources`.
833
884
 
834
885
 
835
886
  Parameters
836
887
  ----------
837
- load_policy : str, default: "fresh"
838
- The policy for loading the checkpoint. The following policies are supported:
839
- - "eager": Loads the the latest available checkpoint within the namespace.
840
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
841
- will be loaded at the start of the task.
842
- - "none": Do not load any checkpoint
843
- - "fresh": Loads the lastest checkpoint created within the running Task.
844
- This mode helps loading checkpoints across various retry attempts of the same task.
845
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
846
- created within the task will be loaded when the task is retries execution on failure.
847
-
848
- temp_dir_root : str, default: None
849
- The root directory under which `current.checkpoint.directory` will be created.
888
+ cpu : int, default 1
889
+ Number of CPUs required for this step.
890
+ gpu : int, optional, default None
891
+ Number of GPUs required for this step.
892
+ disk : int, optional, default None
893
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
894
+ memory : int, default 4096
895
+ Memory size (in MB) required for this step.
896
+ shared_memory : int, optional, default None
897
+ The value for the size (in MiB) of the /dev/shm volume for this step.
898
+ This parameter maps to the `--shm-size` option in Docker.
850
899
  """
851
900
  ...
852
901
 
853
- def nim(*, models: "list[NIM]", backend: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
902
+ @typing.overload
903
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
854
904
  """
855
- This decorator is used to run NIM containers in Metaflow tasks as sidecars.
856
-
857
- User code call
858
- -----------
859
- @nim(
860
- models=['meta/llama3-8b-instruct', 'meta/llama3-70b-instruct'],
861
- backend='managed'
862
- )
863
-
864
- Valid backend options
865
- ---------------------
866
- - 'managed': Outerbounds selects a compute provider based on the model.
905
+ Enables loading / saving of models within a step.
867
906
 
868
- Valid model options
869
- ----------------
870
- - 'meta/llama3-8b-instruct': 8B parameter model
871
- - 'meta/llama3-70b-instruct': 70B parameter model
872
- - any model here: https://nvcf.ngc.nvidia.com/functions?filter=nvidia-functions
873
907
 
874
908
 
875
909
  Parameters
876
910
  ----------
877
- models: list[NIM]
878
- List of NIM containers running models in sidecars.
879
- backend: str
880
- Compute provider to run the NIM container.
881
- queue_timeout : int
882
- Time to keep the job in NVCF's queue.
883
- """
884
- ...
885
-
886
- @typing.overload
887
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
888
- """
889
- Decorator prototype for all step decorators. This function gets specialized
890
- and imported for all decorators types by _import_plugin_decorators().
891
- """
892
- ...
893
-
894
- @typing.overload
895
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
896
- ...
897
-
898
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
899
- """
900
- Decorator prototype for all step decorators. This function gets specialized
901
- and imported for all decorators types by _import_plugin_decorators().
911
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
912
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
913
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
914
+ - `current.checkpoint`
915
+ - `current.model`
916
+ - `current.huggingface_hub`
917
+
918
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
919
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
920
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
921
+
922
+ temp_dir_root : str, default: None
923
+ The root directory under which `current.model.loaded` will store loaded models
902
924
  """
903
925
  ...
904
926
 
905
927
  @typing.overload
906
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
907
- """
908
- Internal decorator to support Fast bakery
909
- """
928
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
910
929
  ...
911
930
 
912
931
  @typing.overload
913
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
914
- ...
915
-
916
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
917
- """
918
- Internal decorator to support Fast bakery
919
- """
932
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
920
933
  ...
921
934
 
922
- @typing.overload
923
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
935
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
924
936
  """
925
- Specifies the PyPI packages for the step.
937
+ Enables loading / saving of models within a step.
926
938
 
927
- Information in this decorator will augment any
928
- attributes set in the `@pyi_base` flow-level decorator. Hence,
929
- you can use `@pypi_base` to set packages required by all
930
- steps and use `@pypi` to specify step-specific overrides.
931
939
 
932
940
 
933
941
  Parameters
934
942
  ----------
935
- packages : Dict[str, str], default: {}
936
- Packages to use for this step. The key is the name of the package
937
- and the value is the version to use.
938
- python : str, optional, default: None
939
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
940
- that the version used will correspond to the version of the Python interpreter used to start the run.
943
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
944
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
945
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
946
+ - `current.checkpoint`
947
+ - `current.model`
948
+ - `current.huggingface_hub`
949
+
950
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
951
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
952
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
953
+
954
+ temp_dir_root : str, default: None
955
+ The root directory under which `current.model.loaded` will store loaded models
941
956
  """
942
957
  ...
943
958
 
944
- @typing.overload
945
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
946
- ...
947
-
948
- @typing.overload
949
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
950
- ...
951
-
952
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
959
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
953
960
  """
954
- Specifies the PyPI packages for the step.
955
-
956
- Information in this decorator will augment any
957
- attributes set in the `@pyi_base` flow-level decorator. Hence,
958
- you can use `@pypi_base` to set packages required by all
959
- steps and use `@pypi` to specify step-specific overrides.
961
+ Specifies that this step should execute on DGX cloud.
960
962
 
961
963
 
962
- Parameters
963
- ----------
964
- packages : Dict[str, str], default: {}
965
- Packages to use for this step. The key is the name of the package
966
- and the value is the version to use.
967
- python : str, optional, default: None
968
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
969
- that the version used will correspond to the version of the Python interpreter used to start the run.
964
+ Parameters
965
+ ----------
966
+ gpu : int
967
+ Number of GPUs to use.
968
+ gpu_type : str
969
+ Type of Nvidia GPU to use.
970
+ queue_timeout : int
971
+ Time to keep the job in NVCF's queue.
970
972
  """
971
973
  ...
972
974
 
@@ -1005,54 +1007,244 @@ def project(*, name: str, branch: typing.Optional[str] = None, production: bool
1005
1007
  """
1006
1008
  ...
1007
1009
 
1010
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1011
+ """
1012
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1013
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1014
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1015
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1016
+ starts only after all sensors finish.
1017
+
1018
+
1019
+ Parameters
1020
+ ----------
1021
+ timeout : int
1022
+ Time, in seconds before the task times out and fails. (Default: 3600)
1023
+ poke_interval : int
1024
+ Time in seconds that the job should wait in between each try. (Default: 60)
1025
+ mode : str
1026
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1027
+ exponential_backoff : bool
1028
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1029
+ pool : str
1030
+ the slot pool this task should run in,
1031
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1032
+ soft_fail : bool
1033
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1034
+ name : str
1035
+ Name of the sensor on Airflow
1036
+ description : str
1037
+ Description of sensor in the Airflow UI
1038
+ bucket_key : Union[str, List[str]]
1039
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1040
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1041
+ bucket_name : str
1042
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1043
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1044
+ wildcard_match : bool
1045
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1046
+ aws_conn_id : str
1047
+ a reference to the s3 connection on Airflow. (Default: None)
1048
+ verify : bool
1049
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1050
+ """
1051
+ ...
1052
+
1053
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1054
+ """
1055
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1056
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1057
+
1058
+
1059
+ Parameters
1060
+ ----------
1061
+ timeout : int
1062
+ Time, in seconds before the task times out and fails. (Default: 3600)
1063
+ poke_interval : int
1064
+ Time in seconds that the job should wait in between each try. (Default: 60)
1065
+ mode : str
1066
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1067
+ exponential_backoff : bool
1068
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1069
+ pool : str
1070
+ the slot pool this task should run in,
1071
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1072
+ soft_fail : bool
1073
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1074
+ name : str
1075
+ Name of the sensor on Airflow
1076
+ description : str
1077
+ Description of sensor in the Airflow UI
1078
+ external_dag_id : str
1079
+ The dag_id that contains the task you want to wait for.
1080
+ external_task_ids : List[str]
1081
+ The list of task_ids that you want to wait for.
1082
+ If None (default value) the sensor waits for the DAG. (Default: None)
1083
+ allowed_states : List[str]
1084
+ Iterable of allowed states, (Default: ['success'])
1085
+ failed_states : List[str]
1086
+ Iterable of failed or dis-allowed states. (Default: None)
1087
+ execution_delta : datetime.timedelta
1088
+ time difference with the previous execution to look at,
1089
+ the default is the same logical date as the current task or DAG. (Default: None)
1090
+ check_existence: bool
1091
+ Set to True to check if the external task exists or check if
1092
+ the DAG to wait for exists. (Default: True)
1093
+ """
1094
+ ...
1095
+
1096
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1097
+ """
1098
+ Allows setting external datastores to save data for the
1099
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1100
+
1101
+ This decorator is useful when users wish to save data to a different datastore
1102
+ than what is configured in Metaflow. This can be for variety of reasons:
1103
+
1104
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1105
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1106
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1107
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1108
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1109
+
1110
+ Usage:
1111
+ ----------
1112
+
1113
+ - Using a custom IAM role to access the datastore.
1114
+
1115
+ ```python
1116
+ @with_artifact_store(
1117
+ type="s3",
1118
+ config=lambda: {
1119
+ "root": "s3://my-bucket-foo/path/to/root",
1120
+ "role_arn": ROLE,
1121
+ },
1122
+ )
1123
+ class MyFlow(FlowSpec):
1124
+
1125
+ @checkpoint
1126
+ @step
1127
+ def start(self):
1128
+ with open("my_file.txt", "w") as f:
1129
+ f.write("Hello, World!")
1130
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1131
+ self.next(self.end)
1132
+
1133
+ ```
1134
+
1135
+ - Using credentials to access the s3-compatible datastore.
1136
+
1137
+ ```python
1138
+ @with_artifact_store(
1139
+ type="s3",
1140
+ config=lambda: {
1141
+ "root": "s3://my-bucket-foo/path/to/root",
1142
+ "client_params": {
1143
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1144
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1145
+ },
1146
+ },
1147
+ )
1148
+ class MyFlow(FlowSpec):
1149
+
1150
+ @checkpoint
1151
+ @step
1152
+ def start(self):
1153
+ with open("my_file.txt", "w") as f:
1154
+ f.write("Hello, World!")
1155
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1156
+ self.next(self.end)
1157
+
1158
+ ```
1159
+
1160
+ - Accessing objects stored in external datastores after task execution.
1161
+
1162
+ ```python
1163
+ run = Run("CheckpointsTestsFlow/8992")
1164
+ with artifact_store_from(run=run, config={
1165
+ "client_params": {
1166
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1167
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1168
+ },
1169
+ }):
1170
+ with Checkpoint() as cp:
1171
+ latest = cp.list(
1172
+ task=run["start"].task
1173
+ )[0]
1174
+ print(latest)
1175
+ cp.load(
1176
+ latest,
1177
+ "test-checkpoints"
1178
+ )
1179
+
1180
+ task = Task("TorchTuneFlow/8484/train/53673")
1181
+ with artifact_store_from(run=run, config={
1182
+ "client_params": {
1183
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1184
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1185
+ },
1186
+ }):
1187
+ load_model(
1188
+ task.data.model_ref,
1189
+ "test-models"
1190
+ )
1191
+ ```
1192
+ Parameters:
1193
+ ----------
1194
+
1195
+ type: str
1196
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1197
+
1198
+ config: dict or Callable
1199
+ Dictionary of configuration options for the datastore. The following keys are required:
1200
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1201
+ - example: 's3://bucket-name/path/to/root'
1202
+ - example: 'gs://bucket-name/path/to/root'
1203
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1204
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1205
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1206
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1207
+ """
1208
+ ...
1209
+
1008
1210
  @typing.overload
1009
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1211
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1010
1212
  """
1011
- Specifies the times when the flow should be run when running on a
1012
- production scheduler.
1213
+ Specifies the PyPI packages for all steps of the flow.
1013
1214
 
1215
+ Use `@pypi_base` to set common packages required by all
1216
+ steps and use `@pypi` to specify step-specific overrides.
1014
1217
 
1015
1218
  Parameters
1016
1219
  ----------
1017
- hourly : bool, default False
1018
- Run the workflow hourly.
1019
- daily : bool, default True
1020
- Run the workflow daily.
1021
- weekly : bool, default False
1022
- Run the workflow weekly.
1023
- cron : str, optional, default None
1024
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1025
- specified by this expression.
1026
- timezone : str, optional, default None
1027
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1028
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1220
+ packages : Dict[str, str], default: {}
1221
+ Packages to use for this flow. The key is the name of the package
1222
+ and the value is the version to use.
1223
+ python : str, optional, default: None
1224
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1225
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1029
1226
  """
1030
1227
  ...
1031
1228
 
1032
1229
  @typing.overload
1033
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1230
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1034
1231
  ...
1035
1232
 
1036
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1233
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1037
1234
  """
1038
- Specifies the times when the flow should be run when running on a
1039
- production scheduler.
1235
+ Specifies the PyPI packages for all steps of the flow.
1040
1236
 
1237
+ Use `@pypi_base` to set common packages required by all
1238
+ steps and use `@pypi` to specify step-specific overrides.
1041
1239
 
1042
1240
  Parameters
1043
1241
  ----------
1044
- hourly : bool, default False
1045
- Run the workflow hourly.
1046
- daily : bool, default True
1047
- Run the workflow daily.
1048
- weekly : bool, default False
1049
- Run the workflow weekly.
1050
- cron : str, optional, default None
1051
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1052
- specified by this expression.
1053
- timezone : str, optional, default None
1054
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1055
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1242
+ packages : Dict[str, str], default: {}
1243
+ Packages to use for this flow. The key is the name of the package
1244
+ and the value is the version to use.
1245
+ python : str, optional, default: None
1246
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1247
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1056
1248
  """
1057
1249
  ...
1058
1250
 
@@ -1149,49 +1341,6 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1149
1341
  """
1150
1342
  ...
1151
1343
 
1152
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1153
- """
1154
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1155
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1156
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1157
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1158
- starts only after all sensors finish.
1159
-
1160
-
1161
- Parameters
1162
- ----------
1163
- timeout : int
1164
- Time, in seconds before the task times out and fails. (Default: 3600)
1165
- poke_interval : int
1166
- Time in seconds that the job should wait in between each try. (Default: 60)
1167
- mode : str
1168
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1169
- exponential_backoff : bool
1170
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1171
- pool : str
1172
- the slot pool this task should run in,
1173
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1174
- soft_fail : bool
1175
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1176
- name : str
1177
- Name of the sensor on Airflow
1178
- description : str
1179
- Description of sensor in the Airflow UI
1180
- bucket_key : Union[str, List[str]]
1181
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1182
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1183
- bucket_name : str
1184
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1185
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1186
- wildcard_match : bool
1187
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1188
- aws_conn_id : str
1189
- a reference to the s3 connection on Airflow. (Default: None)
1190
- verify : bool
1191
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1192
- """
1193
- ...
1194
-
1195
1344
  @typing.overload
1196
1345
  def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1197
1346
  """
@@ -1345,200 +1494,53 @@ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packa
1345
1494
  ...
1346
1495
 
1347
1496
  @typing.overload
1348
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1497
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1349
1498
  """
1350
- Specifies the PyPI packages for all steps of the flow.
1499
+ Specifies the times when the flow should be run when running on a
1500
+ production scheduler.
1351
1501
 
1352
- Use `@pypi_base` to set common packages required by all
1353
- steps and use `@pypi` to specify step-specific overrides.
1354
1502
 
1355
1503
  Parameters
1356
1504
  ----------
1357
- packages : Dict[str, str], default: {}
1358
- Packages to use for this flow. The key is the name of the package
1359
- and the value is the version to use.
1360
- python : str, optional, default: None
1361
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1362
- that the version used will correspond to the version of the Python interpreter used to start the run.
1505
+ hourly : bool, default False
1506
+ Run the workflow hourly.
1507
+ daily : bool, default True
1508
+ Run the workflow daily.
1509
+ weekly : bool, default False
1510
+ Run the workflow weekly.
1511
+ cron : str, optional, default None
1512
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1513
+ specified by this expression.
1514
+ timezone : str, optional, default None
1515
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1516
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1363
1517
  """
1364
1518
  ...
1365
1519
 
1366
1520
  @typing.overload
1367
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1368
- ...
1369
-
1370
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1371
- """
1372
- Specifies the PyPI packages for all steps of the flow.
1373
-
1374
- Use `@pypi_base` to set common packages required by all
1375
- steps and use `@pypi` to specify step-specific overrides.
1376
-
1377
- Parameters
1378
- ----------
1379
- packages : Dict[str, str], default: {}
1380
- Packages to use for this flow. The key is the name of the package
1381
- and the value is the version to use.
1382
- python : str, optional, default: None
1383
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1384
- that the version used will correspond to the version of the Python interpreter used to start the run.
1385
- """
1521
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1386
1522
  ...
1387
1523
 
1388
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1524
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1389
1525
  """
1390
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1391
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1526
+ Specifies the times when the flow should be run when running on a
1527
+ production scheduler.
1392
1528
 
1393
1529
 
1394
1530
  Parameters
1395
1531
  ----------
1396
- timeout : int
1397
- Time, in seconds before the task times out and fails. (Default: 3600)
1398
- poke_interval : int
1399
- Time in seconds that the job should wait in between each try. (Default: 60)
1400
- mode : str
1401
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1402
- exponential_backoff : bool
1403
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1404
- pool : str
1405
- the slot pool this task should run in,
1406
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1407
- soft_fail : bool
1408
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1409
- name : str
1410
- Name of the sensor on Airflow
1411
- description : str
1412
- Description of sensor in the Airflow UI
1413
- external_dag_id : str
1414
- The dag_id that contains the task you want to wait for.
1415
- external_task_ids : List[str]
1416
- The list of task_ids that you want to wait for.
1417
- If None (default value) the sensor waits for the DAG. (Default: None)
1418
- allowed_states : List[str]
1419
- Iterable of allowed states, (Default: ['success'])
1420
- failed_states : List[str]
1421
- Iterable of failed or dis-allowed states. (Default: None)
1422
- execution_delta : datetime.timedelta
1423
- time difference with the previous execution to look at,
1424
- the default is the same logical date as the current task or DAG. (Default: None)
1425
- check_existence: bool
1426
- Set to True to check if the external task exists or check if
1427
- the DAG to wait for exists. (Default: True)
1428
- """
1429
- ...
1430
-
1431
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1432
- """
1433
- Allows setting external datastores to save data for the
1434
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1435
-
1436
- This decorator is useful when users wish to save data to a different datastore
1437
- than what is configured in Metaflow. This can be for variety of reasons:
1438
-
1439
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1440
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1441
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1442
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1443
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1444
-
1445
- Usage:
1446
- ----------
1447
-
1448
- - Using a custom IAM role to access the datastore.
1449
-
1450
- ```python
1451
- @with_artifact_store(
1452
- type="s3",
1453
- config=lambda: {
1454
- "root": "s3://my-bucket-foo/path/to/root",
1455
- "role_arn": ROLE,
1456
- },
1457
- )
1458
- class MyFlow(FlowSpec):
1459
-
1460
- @checkpoint
1461
- @step
1462
- def start(self):
1463
- with open("my_file.txt", "w") as f:
1464
- f.write("Hello, World!")
1465
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1466
- self.next(self.end)
1467
-
1468
- ```
1469
-
1470
- - Using credentials to access the s3-compatible datastore.
1471
-
1472
- ```python
1473
- @with_artifact_store(
1474
- type="s3",
1475
- config=lambda: {
1476
- "root": "s3://my-bucket-foo/path/to/root",
1477
- "client_params": {
1478
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1479
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1480
- },
1481
- },
1482
- )
1483
- class MyFlow(FlowSpec):
1484
-
1485
- @checkpoint
1486
- @step
1487
- def start(self):
1488
- with open("my_file.txt", "w") as f:
1489
- f.write("Hello, World!")
1490
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1491
- self.next(self.end)
1492
-
1493
- ```
1494
-
1495
- - Accessing objects stored in external datastores after task execution.
1496
-
1497
- ```python
1498
- run = Run("CheckpointsTestsFlow/8992")
1499
- with artifact_store_from(run=run, config={
1500
- "client_params": {
1501
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1502
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1503
- },
1504
- }):
1505
- with Checkpoint() as cp:
1506
- latest = cp.list(
1507
- task=run["start"].task
1508
- )[0]
1509
- print(latest)
1510
- cp.load(
1511
- latest,
1512
- "test-checkpoints"
1513
- )
1514
-
1515
- task = Task("TorchTuneFlow/8484/train/53673")
1516
- with artifact_store_from(run=run, config={
1517
- "client_params": {
1518
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1519
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1520
- },
1521
- }):
1522
- load_model(
1523
- task.data.model_ref,
1524
- "test-models"
1525
- )
1526
- ```
1527
- Parameters:
1528
- ----------
1529
-
1530
- type: str
1531
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1532
-
1533
- config: dict or Callable
1534
- Dictionary of configuration options for the datastore. The following keys are required:
1535
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1536
- - example: 's3://bucket-name/path/to/root'
1537
- - example: 'gs://bucket-name/path/to/root'
1538
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1539
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1540
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1541
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1532
+ hourly : bool, default False
1533
+ Run the workflow hourly.
1534
+ daily : bool, default True
1535
+ Run the workflow daily.
1536
+ weekly : bool, default False
1537
+ Run the workflow weekly.
1538
+ cron : str, optional, default None
1539
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1540
+ specified by this expression.
1541
+ timezone : str, optional, default None
1542
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1543
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1542
1544
  """
1543
1545
  ...
1544
1546