ob-metaflow-stubs 6.0.3.176rc1__py2.py3-none-any.whl → 6.0.3.176rc3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. metaflow-stubs/__init__.pyi +635 -635
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +5 -5
  8. metaflow-stubs/client/filecache.pyi +1 -1
  9. metaflow-stubs/events.pyi +1 -1
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +4 -4
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +2 -2
  14. metaflow-stubs/info_file.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +1 -1
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +119 -119
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +2 -2
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +2 -2
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +3 -3
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +3 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +3 -3
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +2 -2
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +1 -1
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +2 -2
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +2 -2
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +1 -1
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +1 -1
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +1 -1
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +1 -1
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  85. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  86. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  89. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +1 -1
  90. metaflow-stubs/multicore_utils.pyi +1 -1
  91. metaflow-stubs/ob_internal.pyi +1 -1
  92. metaflow-stubs/parameters.pyi +2 -2
  93. metaflow-stubs/plugins/__init__.pyi +11 -11
  94. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  95. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  96. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  97. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  98. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  99. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  100. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  101. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  102. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  103. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  104. metaflow-stubs/plugins/argo/argo_workflows.pyi +2 -2
  105. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  106. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +2 -2
  107. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +1 -1
  108. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  109. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  110. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  111. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  112. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  113. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  114. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  115. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  116. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  117. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  118. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  119. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  120. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  121. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  122. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +2 -2
  123. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +1 -1
  124. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  125. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  126. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  127. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  128. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  129. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  130. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  131. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  132. metaflow-stubs/plugins/cards/card_client.pyi +1 -1
  133. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  134. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  135. metaflow-stubs/plugins/cards/card_decorator.pyi +1 -1
  136. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  137. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  138. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  139. metaflow-stubs/plugins/cards/card_modules/components.pyi +3 -3
  140. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  141. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  142. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  143. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  144. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  145. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  146. metaflow-stubs/plugins/catch_decorator.pyi +1 -1
  147. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  148. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  149. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  150. metaflow-stubs/plugins/datatools/s3/s3.pyi +1 -1
  151. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  152. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  153. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  154. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  155. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  156. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  157. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  158. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  159. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  160. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  161. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  162. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  163. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  164. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  165. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  166. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +1 -1
  167. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  168. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  169. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  170. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  171. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  172. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  173. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  174. metaflow-stubs/plugins/perimeters.pyi +1 -1
  175. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  176. metaflow-stubs/plugins/pypi/__init__.pyi +1 -1
  177. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  178. metaflow-stubs/plugins/pypi/conda_environment.pyi +2 -2
  179. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  180. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  181. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  182. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  183. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  184. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  185. metaflow-stubs/plugins/secrets/__init__.pyi +1 -1
  186. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  187. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  188. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  189. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  190. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +1 -1
  191. metaflow-stubs/plugins/timeout_decorator.pyi +1 -1
  192. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  193. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  194. metaflow-stubs/plugins/uv/uv_environment.pyi +1 -1
  195. metaflow-stubs/profilers/__init__.pyi +1 -1
  196. metaflow-stubs/pylint_wrapper.pyi +1 -1
  197. metaflow-stubs/runner/__init__.pyi +1 -1
  198. metaflow-stubs/runner/deployer.pyi +28 -28
  199. metaflow-stubs/runner/deployer_impl.pyi +1 -1
  200. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  201. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  202. metaflow-stubs/runner/nbrun.pyi +1 -1
  203. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  204. metaflow-stubs/runner/utils.pyi +1 -1
  205. metaflow-stubs/system/__init__.pyi +1 -1
  206. metaflow-stubs/system/system_logger.pyi +2 -2
  207. metaflow-stubs/system/system_monitor.pyi +1 -1
  208. metaflow-stubs/tagging_util.pyi +1 -1
  209. metaflow-stubs/tuple_util.pyi +1 -1
  210. metaflow-stubs/user_configs/__init__.pyi +1 -1
  211. metaflow-stubs/user_configs/config_decorators.pyi +5 -5
  212. metaflow-stubs/user_configs/config_options.pyi +1 -1
  213. metaflow-stubs/user_configs/config_parameters.pyi +5 -5
  214. {ob_metaflow_stubs-6.0.3.176rc1.dist-info → ob_metaflow_stubs-6.0.3.176rc3.dist-info}/METADATA +1 -1
  215. ob_metaflow_stubs-6.0.3.176rc3.dist-info/RECORD +218 -0
  216. ob_metaflow_stubs-6.0.3.176rc1.dist-info/RECORD +0 -218
  217. {ob_metaflow_stubs-6.0.3.176rc1.dist-info → ob_metaflow_stubs-6.0.3.176rc3.dist-info}/WHEEL +0 -0
  218. {ob_metaflow_stubs-6.0.3.176rc1.dist-info → ob_metaflow_stubs-6.0.3.176rc3.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.15.14.1+obcheckpoint(0.2.1);ob(v1) #
4
- # Generated on 2025-06-03T02:57:26.607647 #
4
+ # Generated on 2025-06-04T06:35:02.879434 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -37,15 +37,15 @@ from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDec
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
38
  from . import cards as cards
39
39
  from . import tuple_util as tuple_util
40
- from . import events as events
41
40
  from . import metaflow_git as metaflow_git
41
+ from . import events as events
42
42
  from . import runner as runner
43
43
  from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
45
  from . import includefile as includefile
46
46
  from .includefile import IncludeFile as IncludeFile
47
- from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
48
47
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
48
+ from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
49
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
50
50
  from . import client as client
51
51
  from .client.core import namespace as namespace
@@ -154,131 +154,82 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
154
154
  """
155
155
  ...
156
156
 
157
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
158
- """
159
- Specifies that this step should execute on DGX cloud.
160
-
161
-
162
- Parameters
163
- ----------
164
- gpu : int
165
- Number of GPUs to use.
166
- gpu_type : str
167
- Type of Nvidia GPU to use.
168
- queue_timeout : int
169
- Time to keep the job in NVCF's queue.
170
- """
171
- ...
172
-
173
- @typing.overload
174
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
175
- """
176
- Enables checkpointing for a step.
177
-
178
-
179
-
180
- Parameters
181
- ----------
182
- load_policy : str, default: "fresh"
183
- The policy for loading the checkpoint. The following policies are supported:
184
- - "eager": Loads the the latest available checkpoint within the namespace.
185
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
186
- will be loaded at the start of the task.
187
- - "none": Do not load any checkpoint
188
- - "fresh": Loads the lastest checkpoint created within the running Task.
189
- This mode helps loading checkpoints across various retry attempts of the same task.
190
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
191
- created within the task will be loaded when the task is retries execution on failure.
192
-
193
- temp_dir_root : str, default: None
194
- The root directory under which `current.checkpoint.directory` will be created.
195
- """
196
- ...
197
-
198
- @typing.overload
199
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
200
- ...
201
-
202
- @typing.overload
203
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
204
- ...
205
-
206
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
207
- """
208
- Enables checkpointing for a step.
209
-
210
-
211
-
212
- Parameters
213
- ----------
214
- load_policy : str, default: "fresh"
215
- The policy for loading the checkpoint. The following policies are supported:
216
- - "eager": Loads the the latest available checkpoint within the namespace.
217
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
218
- will be loaded at the start of the task.
219
- - "none": Do not load any checkpoint
220
- - "fresh": Loads the lastest checkpoint created within the running Task.
221
- This mode helps loading checkpoints across various retry attempts of the same task.
222
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
223
- created within the task will be loaded when the task is retries execution on failure.
224
-
225
- temp_dir_root : str, default: None
226
- The root directory under which `current.checkpoint.directory` will be created.
227
- """
228
- ...
229
-
230
157
  @typing.overload
231
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
158
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
232
159
  """
233
- Specifies the number of times the task corresponding
234
- to a step needs to be retried.
160
+ Specifies the resources needed when executing this step.
235
161
 
236
- This decorator is useful for handling transient errors, such as networking issues.
237
- If your task contains operations that can't be retried safely, e.g. database updates,
238
- it is advisable to annotate it with `@retry(times=0)`.
162
+ Use `@resources` to specify the resource requirements
163
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
239
164
 
240
- This can be used in conjunction with the `@catch` decorator. The `@catch`
241
- decorator will execute a no-op task after all retries have been exhausted,
242
- ensuring that the flow execution can continue.
165
+ You can choose the compute layer on the command line by executing e.g.
166
+ ```
167
+ python myflow.py run --with batch
168
+ ```
169
+ or
170
+ ```
171
+ python myflow.py run --with kubernetes
172
+ ```
173
+ which executes the flow on the desired system using the
174
+ requirements specified in `@resources`.
243
175
 
244
176
 
245
177
  Parameters
246
178
  ----------
247
- times : int, default 3
248
- Number of times to retry this task.
249
- minutes_between_retries : int, default 2
250
- Number of minutes between retries.
179
+ cpu : int, default 1
180
+ Number of CPUs required for this step.
181
+ gpu : int, optional, default None
182
+ Number of GPUs required for this step.
183
+ disk : int, optional, default None
184
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
185
+ memory : int, default 4096
186
+ Memory size (in MB) required for this step.
187
+ shared_memory : int, optional, default None
188
+ The value for the size (in MiB) of the /dev/shm volume for this step.
189
+ This parameter maps to the `--shm-size` option in Docker.
251
190
  """
252
191
  ...
253
192
 
254
193
  @typing.overload
255
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
194
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
256
195
  ...
257
196
 
258
197
  @typing.overload
259
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
198
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
260
199
  ...
261
200
 
262
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
201
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
263
202
  """
264
- Specifies the number of times the task corresponding
265
- to a step needs to be retried.
203
+ Specifies the resources needed when executing this step.
266
204
 
267
- This decorator is useful for handling transient errors, such as networking issues.
268
- If your task contains operations that can't be retried safely, e.g. database updates,
269
- it is advisable to annotate it with `@retry(times=0)`.
205
+ Use `@resources` to specify the resource requirements
206
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
270
207
 
271
- This can be used in conjunction with the `@catch` decorator. The `@catch`
272
- decorator will execute a no-op task after all retries have been exhausted,
273
- ensuring that the flow execution can continue.
208
+ You can choose the compute layer on the command line by executing e.g.
209
+ ```
210
+ python myflow.py run --with batch
211
+ ```
212
+ or
213
+ ```
214
+ python myflow.py run --with kubernetes
215
+ ```
216
+ which executes the flow on the desired system using the
217
+ requirements specified in `@resources`.
274
218
 
275
219
 
276
220
  Parameters
277
221
  ----------
278
- times : int, default 3
279
- Number of times to retry this task.
280
- minutes_between_retries : int, default 2
281
- Number of minutes between retries.
222
+ cpu : int, default 1
223
+ Number of CPUs required for this step.
224
+ gpu : int, optional, default None
225
+ Number of GPUs required for this step.
226
+ disk : int, optional, default None
227
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
228
+ memory : int, default 4096
229
+ Memory size (in MB) required for this step.
230
+ shared_memory : int, optional, default None
231
+ The value for the size (in MiB) of the /dev/shm volume for this step.
232
+ This parameter maps to the `--shm-size` option in Docker.
282
233
  """
283
234
  ...
284
235
 
@@ -367,260 +318,296 @@ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: ty
367
318
  """
368
319
  ...
369
320
 
370
- @typing.overload
371
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
321
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
372
322
  """
373
- Specifies that the step will success under all circumstances.
323
+ Specifies that this step should execute on DGX cloud.
374
324
 
375
- The decorator will create an optional artifact, specified by `var`, which
376
- contains the exception raised. You can use it to detect the presence
377
- of errors, indicating that all happy-path artifacts produced by the step
378
- are missing.
325
+
326
+ Parameters
327
+ ----------
328
+ gpu : int
329
+ Number of GPUs to use.
330
+ gpu_type : str
331
+ Type of Nvidia GPU to use.
332
+ """
333
+ ...
334
+
335
+ @typing.overload
336
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
337
+ """
338
+ Specifies secrets to be retrieved and injected as environment variables prior to
339
+ the execution of a step.
379
340
 
380
341
 
381
342
  Parameters
382
343
  ----------
383
- var : str, optional, default None
384
- Name of the artifact in which to store the caught exception.
385
- If not specified, the exception is not stored.
386
- print_exception : bool, default True
387
- Determines whether or not the exception is printed to
388
- stdout when caught.
344
+ sources : List[Union[str, Dict[str, Any]]], default: []
345
+ List of secret specs, defining how the secrets are to be retrieved
389
346
  """
390
347
  ...
391
348
 
392
349
  @typing.overload
393
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
350
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
394
351
  ...
395
352
 
396
353
  @typing.overload
397
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
354
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
398
355
  ...
399
356
 
400
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
357
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
401
358
  """
402
- Specifies that the step will success under all circumstances.
403
-
404
- The decorator will create an optional artifact, specified by `var`, which
405
- contains the exception raised. You can use it to detect the presence
406
- of errors, indicating that all happy-path artifacts produced by the step
407
- are missing.
359
+ Specifies secrets to be retrieved and injected as environment variables prior to
360
+ the execution of a step.
408
361
 
409
362
 
410
363
  Parameters
411
364
  ----------
412
- var : str, optional, default None
413
- Name of the artifact in which to store the caught exception.
414
- If not specified, the exception is not stored.
415
- print_exception : bool, default True
416
- Determines whether or not the exception is printed to
417
- stdout when caught.
365
+ sources : List[Union[str, Dict[str, Any]]], default: []
366
+ List of secret specs, defining how the secrets are to be retrieved
418
367
  """
419
368
  ...
420
369
 
421
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
370
+ @typing.overload
371
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
422
372
  """
423
- Decorator that helps cache, version and store models/datasets from huggingface hub.
373
+ Specifies environment variables to be set prior to the execution of a step.
424
374
 
425
375
 
426
376
  Parameters
427
377
  ----------
428
- temp_dir_root : str, optional
429
- The root directory that will hold the temporary directory where objects will be downloaded.
430
-
431
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
432
- The list of repos (models/datasets) to load.
433
-
434
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
378
+ vars : Dict[str, str], default {}
379
+ Dictionary of environment variables to set.
380
+ """
381
+ ...
382
+
383
+ @typing.overload
384
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
385
+ ...
386
+
387
+ @typing.overload
388
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
389
+ ...
390
+
391
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
392
+ """
393
+ Specifies environment variables to be set prior to the execution of a step.
435
394
 
436
- - If repo (model/dataset) is not found in the datastore:
437
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
438
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
439
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
440
395
 
441
- - If repo is found in the datastore:
442
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
396
+ Parameters
397
+ ----------
398
+ vars : Dict[str, str], default {}
399
+ Dictionary of environment variables to set.
443
400
  """
444
401
  ...
445
402
 
446
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
403
+ @typing.overload
404
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
447
405
  """
448
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
406
+ Specifies the number of times the task corresponding
407
+ to a step needs to be retried.
449
408
 
450
- User code call
451
- --------------
452
- @ollama(
453
- models=[...],
454
- ...
455
- )
456
-
457
- Valid backend options
458
- ---------------------
459
- - 'local': Run as a separate process on the local task machine.
460
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
461
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
409
+ This decorator is useful for handling transient errors, such as networking issues.
410
+ If your task contains operations that can't be retried safely, e.g. database updates,
411
+ it is advisable to annotate it with `@retry(times=0)`.
462
412
 
463
- Valid model options
464
- -------------------
465
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
413
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
414
+ decorator will execute a no-op task after all retries have been exhausted,
415
+ ensuring that the flow execution can continue.
466
416
 
467
417
 
468
418
  Parameters
469
419
  ----------
470
- models: list[str]
471
- List of Ollama containers running models in sidecars.
472
- backend: str
473
- Determines where and how to run the Ollama process.
474
- force_pull: bool
475
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
476
- cache_update_policy: str
477
- Cache update policy: "auto", "force", or "never".
478
- force_cache_update: bool
479
- Simple override for "force" cache update policy.
480
- debug: bool
481
- Whether to turn on verbose debugging logs.
482
- circuit_breaker_config: dict
483
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
484
- timeout_config: dict
485
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
420
+ times : int, default 3
421
+ Number of times to retry this task.
422
+ minutes_between_retries : int, default 2
423
+ Number of minutes between retries.
486
424
  """
487
425
  ...
488
426
 
489
427
  @typing.overload
490
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
428
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
429
+ ...
430
+
431
+ @typing.overload
432
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
433
+ ...
434
+
435
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
491
436
  """
492
- Specifies a timeout for your step.
437
+ Specifies the number of times the task corresponding
438
+ to a step needs to be retried.
493
439
 
494
- This decorator is useful if this step may hang indefinitely.
440
+ This decorator is useful for handling transient errors, such as networking issues.
441
+ If your task contains operations that can't be retried safely, e.g. database updates,
442
+ it is advisable to annotate it with `@retry(times=0)`.
495
443
 
496
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
497
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
498
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
444
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
445
+ decorator will execute a no-op task after all retries have been exhausted,
446
+ ensuring that the flow execution can continue.
447
+
448
+
449
+ Parameters
450
+ ----------
451
+ times : int, default 3
452
+ Number of times to retry this task.
453
+ minutes_between_retries : int, default 2
454
+ Number of minutes between retries.
455
+ """
456
+ ...
457
+
458
+ @typing.overload
459
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
460
+ """
461
+ Enables loading / saving of models within a step.
499
462
 
500
- Note that all the values specified in parameters are added together so if you specify
501
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
502
463
 
503
464
 
504
465
  Parameters
505
466
  ----------
506
- seconds : int, default 0
507
- Number of seconds to wait prior to timing out.
508
- minutes : int, default 0
509
- Number of minutes to wait prior to timing out.
510
- hours : int, default 0
511
- Number of hours to wait prior to timing out.
467
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
468
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
469
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
470
+ - `current.checkpoint`
471
+ - `current.model`
472
+ - `current.huggingface_hub`
473
+
474
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
475
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
476
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
477
+
478
+ temp_dir_root : str, default: None
479
+ The root directory under which `current.model.loaded` will store loaded models
512
480
  """
513
481
  ...
514
482
 
515
483
  @typing.overload
516
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
484
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
517
485
  ...
518
486
 
519
487
  @typing.overload
520
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
488
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
521
489
  ...
522
490
 
523
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
491
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
524
492
  """
525
- Specifies a timeout for your step.
526
-
527
- This decorator is useful if this step may hang indefinitely.
528
-
529
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
530
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
531
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
493
+ Enables loading / saving of models within a step.
532
494
 
533
- Note that all the values specified in parameters are added together so if you specify
534
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
535
495
 
536
496
 
537
497
  Parameters
538
498
  ----------
539
- seconds : int, default 0
540
- Number of seconds to wait prior to timing out.
541
- minutes : int, default 0
542
- Number of minutes to wait prior to timing out.
543
- hours : int, default 0
544
- Number of hours to wait prior to timing out.
499
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
500
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
501
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
502
+ - `current.checkpoint`
503
+ - `current.model`
504
+ - `current.huggingface_hub`
505
+
506
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
507
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
508
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
509
+
510
+ temp_dir_root : str, default: None
511
+ The root directory under which `current.model.loaded` will store loaded models
545
512
  """
546
513
  ...
547
514
 
548
- @typing.overload
549
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
515
+ def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
550
516
  """
551
- Specifies the resources needed when executing this step.
552
-
553
- Use `@resources` to specify the resource requirements
554
- independently of the specific compute layer (`@batch`, `@kubernetes`).
555
-
556
- You can choose the compute layer on the command line by executing e.g.
557
- ```
558
- python myflow.py run --with batch
559
- ```
560
- or
561
- ```
562
- python myflow.py run --with kubernetes
563
- ```
564
- which executes the flow on the desired system using the
565
- requirements specified in `@resources`.
517
+ Specifies that this step is used to deploy an instance of the app.
518
+ Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
566
519
 
567
520
 
568
521
  Parameters
569
522
  ----------
570
- cpu : int, default 1
571
- Number of CPUs required for this step.
572
- gpu : int, optional, default None
573
- Number of GPUs required for this step.
574
- disk : int, optional, default None
575
- Disk size (in MB) required for this step. Only applies on Kubernetes.
576
- memory : int, default 4096
577
- Memory size (in MB) required for this step.
578
- shared_memory : int, optional, default None
579
- The value for the size (in MiB) of the /dev/shm volume for this step.
580
- This parameter maps to the `--shm-size` option in Docker.
523
+ app_port : int
524
+ Number of GPUs to use.
525
+ app_name : str
526
+ Name of the app to deploy.
581
527
  """
582
528
  ...
583
529
 
584
- @typing.overload
585
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
530
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
531
+ """
532
+ Specifies that this step should execute on DGX cloud.
533
+
534
+
535
+ Parameters
536
+ ----------
537
+ gpu : int
538
+ Number of GPUs to use.
539
+ gpu_type : str
540
+ Type of Nvidia GPU to use.
541
+ queue_timeout : int
542
+ Time to keep the job in NVCF's queue.
543
+ """
586
544
  ...
587
545
 
588
- @typing.overload
589
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
546
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
547
+ """
548
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
549
+
550
+
551
+ Parameters
552
+ ----------
553
+ temp_dir_root : str, optional
554
+ The root directory that will hold the temporary directory where objects will be downloaded.
555
+
556
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
557
+ The list of repos (models/datasets) to load.
558
+
559
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
560
+
561
+ - If repo (model/dataset) is not found in the datastore:
562
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
563
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
564
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
565
+
566
+ - If repo is found in the datastore:
567
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
568
+ """
590
569
  ...
591
570
 
592
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
571
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
593
572
  """
594
- Specifies the resources needed when executing this step.
573
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
595
574
 
596
- Use `@resources` to specify the resource requirements
597
- independently of the specific compute layer (`@batch`, `@kubernetes`).
575
+ User code call
576
+ --------------
577
+ @ollama(
578
+ models=[...],
579
+ ...
580
+ )
598
581
 
599
- You can choose the compute layer on the command line by executing e.g.
600
- ```
601
- python myflow.py run --with batch
602
- ```
603
- or
604
- ```
605
- python myflow.py run --with kubernetes
606
- ```
607
- which executes the flow on the desired system using the
608
- requirements specified in `@resources`.
582
+ Valid backend options
583
+ ---------------------
584
+ - 'local': Run as a separate process on the local task machine.
585
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
586
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
587
+
588
+ Valid model options
589
+ -------------------
590
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
609
591
 
610
592
 
611
593
  Parameters
612
594
  ----------
613
- cpu : int, default 1
614
- Number of CPUs required for this step.
615
- gpu : int, optional, default None
616
- Number of GPUs required for this step.
617
- disk : int, optional, default None
618
- Disk size (in MB) required for this step. Only applies on Kubernetes.
619
- memory : int, default 4096
620
- Memory size (in MB) required for this step.
621
- shared_memory : int, optional, default None
622
- The value for the size (in MiB) of the /dev/shm volume for this step.
623
- This parameter maps to the `--shm-size` option in Docker.
595
+ models: list[str]
596
+ List of Ollama containers running models in sidecars.
597
+ backend: str
598
+ Determines where and how to run the Ollama process.
599
+ force_pull: bool
600
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
601
+ cache_update_policy: str
602
+ Cache update policy: "auto", "force", or "never".
603
+ force_cache_update: bool
604
+ Simple override for "force" cache update policy.
605
+ debug: bool
606
+ Whether to turn on verbose debugging logs.
607
+ circuit_breaker_config: dict
608
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
609
+ timeout_config: dict
610
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
624
611
  """
625
612
  ...
626
613
 
@@ -642,35 +629,53 @@ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepF
642
629
  ...
643
630
 
644
631
  @typing.overload
645
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
632
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
646
633
  """
647
- Specifies environment variables to be set prior to the execution of a step.
634
+ Specifies that the step will success under all circumstances.
635
+
636
+ The decorator will create an optional artifact, specified by `var`, which
637
+ contains the exception raised. You can use it to detect the presence
638
+ of errors, indicating that all happy-path artifacts produced by the step
639
+ are missing.
648
640
 
649
641
 
650
642
  Parameters
651
643
  ----------
652
- vars : Dict[str, str], default {}
653
- Dictionary of environment variables to set.
644
+ var : str, optional, default None
645
+ Name of the artifact in which to store the caught exception.
646
+ If not specified, the exception is not stored.
647
+ print_exception : bool, default True
648
+ Determines whether or not the exception is printed to
649
+ stdout when caught.
654
650
  """
655
651
  ...
656
652
 
657
653
  @typing.overload
658
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
654
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
659
655
  ...
660
656
 
661
657
  @typing.overload
662
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
658
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
663
659
  ...
664
660
 
665
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
661
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
666
662
  """
667
- Specifies environment variables to be set prior to the execution of a step.
663
+ Specifies that the step will success under all circumstances.
664
+
665
+ The decorator will create an optional artifact, specified by `var`, which
666
+ contains the exception raised. You can use it to detect the presence
667
+ of errors, indicating that all happy-path artifacts produced by the step
668
+ are missing.
668
669
 
669
670
 
670
671
  Parameters
671
672
  ----------
672
- vars : Dict[str, str], default {}
673
- Dictionary of environment variables to set.
673
+ var : str, optional, default None
674
+ Name of the artifact in which to store the caught exception.
675
+ If not specified, the exception is not stored.
676
+ print_exception : bool, default True
677
+ Determines whether or not the exception is printed to
678
+ stdout when caught.
674
679
  """
675
680
  ...
676
681
 
@@ -726,250 +731,245 @@ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
726
731
  ...
727
732
 
728
733
  @typing.overload
729
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
734
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
730
735
  """
731
- Creates a human-readable report, a Metaflow Card, after this step completes.
736
+ Specifies the Conda environment for the step.
732
737
 
733
- Note that you may add multiple `@card` decorators in a step with different parameters.
738
+ Information in this decorator will augment any
739
+ attributes set in the `@conda_base` flow-level decorator. Hence,
740
+ you can use `@conda_base` to set packages required by all
741
+ steps and use `@conda` to specify step-specific overrides.
734
742
 
735
743
 
736
744
  Parameters
737
745
  ----------
738
- type : str, default 'default'
739
- Card type.
740
- id : str, optional, default None
741
- If multiple cards are present, use this id to identify this card.
742
- options : Dict[str, Any], default {}
743
- Options passed to the card. The contents depend on the card type.
744
- timeout : int, default 45
745
- Interrupt reporting if it takes more than this many seconds.
746
+ packages : Dict[str, str], default {}
747
+ Packages to use for this step. The key is the name of the package
748
+ and the value is the version to use.
749
+ libraries : Dict[str, str], default {}
750
+ Supported for backward compatibility. When used with packages, packages will take precedence.
751
+ python : str, optional, default None
752
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
753
+ that the version used will correspond to the version of the Python interpreter used to start the run.
754
+ disabled : bool, default False
755
+ If set to True, disables @conda.
746
756
  """
747
757
  ...
748
758
 
749
759
  @typing.overload
750
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
760
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
751
761
  ...
752
762
 
753
763
  @typing.overload
754
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
764
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
755
765
  ...
756
766
 
757
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
767
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
758
768
  """
759
- Creates a human-readable report, a Metaflow Card, after this step completes.
769
+ Specifies the Conda environment for the step.
760
770
 
761
- Note that you may add multiple `@card` decorators in a step with different parameters.
771
+ Information in this decorator will augment any
772
+ attributes set in the `@conda_base` flow-level decorator. Hence,
773
+ you can use `@conda_base` to set packages required by all
774
+ steps and use `@conda` to specify step-specific overrides.
762
775
 
763
776
 
764
777
  Parameters
765
778
  ----------
766
- type : str, default 'default'
767
- Card type.
768
- id : str, optional, default None
769
- If multiple cards are present, use this id to identify this card.
770
- options : Dict[str, Any], default {}
771
- Options passed to the card. The contents depend on the card type.
772
- timeout : int, default 45
773
- Interrupt reporting if it takes more than this many seconds.
774
- """
775
- ...
776
-
777
- @typing.overload
778
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
779
- """
780
- Decorator prototype for all step decorators. This function gets specialized
781
- and imported for all decorators types by _import_plugin_decorators().
779
+ packages : Dict[str, str], default {}
780
+ Packages to use for this step. The key is the name of the package
781
+ and the value is the version to use.
782
+ libraries : Dict[str, str], default {}
783
+ Supported for backward compatibility. When used with packages, packages will take precedence.
784
+ python : str, optional, default None
785
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
786
+ that the version used will correspond to the version of the Python interpreter used to start the run.
787
+ disabled : bool, default False
788
+ If set to True, disables @conda.
782
789
  """
783
790
  ...
784
791
 
785
792
  @typing.overload
786
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
787
- ...
788
-
789
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
790
- """
791
- Decorator prototype for all step decorators. This function gets specialized
792
- and imported for all decorators types by _import_plugin_decorators().
793
- """
794
- ...
795
-
796
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
793
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
797
794
  """
798
- Specifies that this step should execute on DGX cloud.
795
+ Enables checkpointing for a step.
796
+
799
797
 
800
798
 
801
799
  Parameters
802
800
  ----------
803
- gpu : int
804
- Number of GPUs to use.
805
- gpu_type : str
806
- Type of Nvidia GPU to use.
801
+ load_policy : str, default: "fresh"
802
+ The policy for loading the checkpoint. The following policies are supported:
803
+ - "eager": Loads the the latest available checkpoint within the namespace.
804
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
805
+ will be loaded at the start of the task.
806
+ - "none": Do not load any checkpoint
807
+ - "fresh": Loads the lastest checkpoint created within the running Task.
808
+ This mode helps loading checkpoints across various retry attempts of the same task.
809
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
810
+ created within the task will be loaded when the task is retries execution on failure.
811
+
812
+ temp_dir_root : str, default: None
813
+ The root directory under which `current.checkpoint.directory` will be created.
807
814
  """
808
815
  ...
809
816
 
810
- def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
811
- """
812
- Specifies that this step is used to deploy an instance of the app.
813
- Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
814
-
815
-
816
- Parameters
817
- ----------
818
- app_port : int
819
- Number of GPUs to use.
820
- app_name : str
821
- Name of the app to deploy.
822
- """
817
+ @typing.overload
818
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
823
819
  ...
824
820
 
825
821
  @typing.overload
826
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
822
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
823
+ ...
824
+
825
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
827
826
  """
828
- Enables loading / saving of models within a step.
827
+ Enables checkpointing for a step.
829
828
 
830
829
 
831
830
 
832
831
  Parameters
833
832
  ----------
834
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
835
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
836
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
837
- - `current.checkpoint`
838
- - `current.model`
839
- - `current.huggingface_hub`
840
-
841
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
842
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
843
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
833
+ load_policy : str, default: "fresh"
834
+ The policy for loading the checkpoint. The following policies are supported:
835
+ - "eager": Loads the the latest available checkpoint within the namespace.
836
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
837
+ will be loaded at the start of the task.
838
+ - "none": Do not load any checkpoint
839
+ - "fresh": Loads the lastest checkpoint created within the running Task.
840
+ This mode helps loading checkpoints across various retry attempts of the same task.
841
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
842
+ created within the task will be loaded when the task is retries execution on failure.
844
843
 
845
844
  temp_dir_root : str, default: None
846
- The root directory under which `current.model.loaded` will store loaded models
845
+ The root directory under which `current.checkpoint.directory` will be created.
847
846
  """
848
847
  ...
849
848
 
850
849
  @typing.overload
851
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
850
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
851
+ """
852
+ Decorator prototype for all step decorators. This function gets specialized
853
+ and imported for all decorators types by _import_plugin_decorators().
854
+ """
852
855
  ...
853
856
 
854
857
  @typing.overload
855
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
858
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
856
859
  ...
857
860
 
858
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
861
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
859
862
  """
860
- Enables loading / saving of models within a step.
861
-
862
-
863
-
864
- Parameters
865
- ----------
866
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
867
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
868
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
869
- - `current.checkpoint`
870
- - `current.model`
871
- - `current.huggingface_hub`
872
-
873
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
874
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
875
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
876
-
877
- temp_dir_root : str, default: None
878
- The root directory under which `current.model.loaded` will store loaded models
863
+ Decorator prototype for all step decorators. This function gets specialized
864
+ and imported for all decorators types by _import_plugin_decorators().
879
865
  """
880
866
  ...
881
867
 
882
868
  @typing.overload
883
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
869
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
884
870
  """
885
- Specifies secrets to be retrieved and injected as environment variables prior to
886
- the execution of a step.
871
+ Specifies a timeout for your step.
872
+
873
+ This decorator is useful if this step may hang indefinitely.
874
+
875
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
876
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
877
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
878
+
879
+ Note that all the values specified in parameters are added together so if you specify
880
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
887
881
 
888
882
 
889
883
  Parameters
890
884
  ----------
891
- sources : List[Union[str, Dict[str, Any]]], default: []
892
- List of secret specs, defining how the secrets are to be retrieved
885
+ seconds : int, default 0
886
+ Number of seconds to wait prior to timing out.
887
+ minutes : int, default 0
888
+ Number of minutes to wait prior to timing out.
889
+ hours : int, default 0
890
+ Number of hours to wait prior to timing out.
893
891
  """
894
892
  ...
895
893
 
896
894
  @typing.overload
897
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
895
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
898
896
  ...
899
897
 
900
898
  @typing.overload
901
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
899
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
902
900
  ...
903
901
 
904
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
902
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
905
903
  """
906
- Specifies secrets to be retrieved and injected as environment variables prior to
907
- the execution of a step.
904
+ Specifies a timeout for your step.
905
+
906
+ This decorator is useful if this step may hang indefinitely.
907
+
908
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
909
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
910
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
911
+
912
+ Note that all the values specified in parameters are added together so if you specify
913
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
908
914
 
909
915
 
910
916
  Parameters
911
917
  ----------
912
- sources : List[Union[str, Dict[str, Any]]], default: []
913
- List of secret specs, defining how the secrets are to be retrieved
918
+ seconds : int, default 0
919
+ Number of seconds to wait prior to timing out.
920
+ minutes : int, default 0
921
+ Number of minutes to wait prior to timing out.
922
+ hours : int, default 0
923
+ Number of hours to wait prior to timing out.
914
924
  """
915
925
  ...
916
926
 
917
927
  @typing.overload
918
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
928
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
919
929
  """
920
- Specifies the Conda environment for the step.
930
+ Creates a human-readable report, a Metaflow Card, after this step completes.
921
931
 
922
- Information in this decorator will augment any
923
- attributes set in the `@conda_base` flow-level decorator. Hence,
924
- you can use `@conda_base` to set packages required by all
925
- steps and use `@conda` to specify step-specific overrides.
932
+ Note that you may add multiple `@card` decorators in a step with different parameters.
926
933
 
927
934
 
928
935
  Parameters
929
936
  ----------
930
- packages : Dict[str, str], default {}
931
- Packages to use for this step. The key is the name of the package
932
- and the value is the version to use.
933
- libraries : Dict[str, str], default {}
934
- Supported for backward compatibility. When used with packages, packages will take precedence.
935
- python : str, optional, default None
936
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
937
- that the version used will correspond to the version of the Python interpreter used to start the run.
938
- disabled : bool, default False
939
- If set to True, disables @conda.
937
+ type : str, default 'default'
938
+ Card type.
939
+ id : str, optional, default None
940
+ If multiple cards are present, use this id to identify this card.
941
+ options : Dict[str, Any], default {}
942
+ Options passed to the card. The contents depend on the card type.
943
+ timeout : int, default 45
944
+ Interrupt reporting if it takes more than this many seconds.
940
945
  """
941
946
  ...
942
947
 
943
948
  @typing.overload
944
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
949
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
945
950
  ...
946
951
 
947
952
  @typing.overload
948
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
953
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
949
954
  ...
950
955
 
951
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
956
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
952
957
  """
953
- Specifies the Conda environment for the step.
958
+ Creates a human-readable report, a Metaflow Card, after this step completes.
954
959
 
955
- Information in this decorator will augment any
956
- attributes set in the `@conda_base` flow-level decorator. Hence,
957
- you can use `@conda_base` to set packages required by all
958
- steps and use `@conda` to specify step-specific overrides.
960
+ Note that you may add multiple `@card` decorators in a step with different parameters.
959
961
 
960
962
 
961
963
  Parameters
962
964
  ----------
963
- packages : Dict[str, str], default {}
964
- Packages to use for this step. The key is the name of the package
965
- and the value is the version to use.
966
- libraries : Dict[str, str], default {}
967
- Supported for backward compatibility. When used with packages, packages will take precedence.
968
- python : str, optional, default None
969
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
970
- that the version used will correspond to the version of the Python interpreter used to start the run.
971
- disabled : bool, default False
972
- If set to True, disables @conda.
965
+ type : str, default 'default'
966
+ Card type.
967
+ id : str, optional, default None
968
+ If multiple cards are present, use this id to identify this card.
969
+ options : Dict[str, Any], default {}
970
+ Options passed to the card. The contents depend on the card type.
971
+ timeout : int, default 45
972
+ Interrupt reporting if it takes more than this many seconds.
973
973
  """
974
974
  ...
975
975
 
@@ -1016,81 +1016,168 @@ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str,
1016
1016
  """
1017
1017
  ...
1018
1018
 
1019
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1019
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1020
1020
  """
1021
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1022
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1023
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1024
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1025
- starts only after all sensors finish.
1021
+ Allows setting external datastores to save data for the
1022
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1026
1023
 
1024
+ This decorator is useful when users wish to save data to a different datastore
1025
+ than what is configured in Metaflow. This can be for variety of reasons:
1027
1026
 
1028
- Parameters
1027
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1028
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1029
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1030
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1031
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1032
+
1033
+ Usage:
1029
1034
  ----------
1030
- timeout : int
1031
- Time, in seconds before the task times out and fails. (Default: 3600)
1032
- poke_interval : int
1033
- Time in seconds that the job should wait in between each try. (Default: 60)
1034
- mode : str
1035
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1036
- exponential_backoff : bool
1037
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1038
- pool : str
1039
- the slot pool this task should run in,
1040
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1041
- soft_fail : bool
1042
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1043
- name : str
1044
- Name of the sensor on Airflow
1045
- description : str
1046
- Description of sensor in the Airflow UI
1047
- bucket_key : Union[str, List[str]]
1048
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1049
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1050
- bucket_name : str
1051
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1052
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1053
- wildcard_match : bool
1054
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1055
- aws_conn_id : str
1056
- a reference to the s3 connection on Airflow. (Default: None)
1057
- verify : bool
1058
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1035
+
1036
+ - Using a custom IAM role to access the datastore.
1037
+
1038
+ ```python
1039
+ @with_artifact_store(
1040
+ type="s3",
1041
+ config=lambda: {
1042
+ "root": "s3://my-bucket-foo/path/to/root",
1043
+ "role_arn": ROLE,
1044
+ },
1045
+ )
1046
+ class MyFlow(FlowSpec):
1047
+
1048
+ @checkpoint
1049
+ @step
1050
+ def start(self):
1051
+ with open("my_file.txt", "w") as f:
1052
+ f.write("Hello, World!")
1053
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1054
+ self.next(self.end)
1055
+
1056
+ ```
1057
+
1058
+ - Using credentials to access the s3-compatible datastore.
1059
+
1060
+ ```python
1061
+ @with_artifact_store(
1062
+ type="s3",
1063
+ config=lambda: {
1064
+ "root": "s3://my-bucket-foo/path/to/root",
1065
+ "client_params": {
1066
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1067
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1068
+ },
1069
+ },
1070
+ )
1071
+ class MyFlow(FlowSpec):
1072
+
1073
+ @checkpoint
1074
+ @step
1075
+ def start(self):
1076
+ with open("my_file.txt", "w") as f:
1077
+ f.write("Hello, World!")
1078
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1079
+ self.next(self.end)
1080
+
1081
+ ```
1082
+
1083
+ - Accessing objects stored in external datastores after task execution.
1084
+
1085
+ ```python
1086
+ run = Run("CheckpointsTestsFlow/8992")
1087
+ with artifact_store_from(run=run, config={
1088
+ "client_params": {
1089
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1090
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1091
+ },
1092
+ }):
1093
+ with Checkpoint() as cp:
1094
+ latest = cp.list(
1095
+ task=run["start"].task
1096
+ )[0]
1097
+ print(latest)
1098
+ cp.load(
1099
+ latest,
1100
+ "test-checkpoints"
1101
+ )
1102
+
1103
+ task = Task("TorchTuneFlow/8484/train/53673")
1104
+ with artifact_store_from(run=run, config={
1105
+ "client_params": {
1106
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1107
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1108
+ },
1109
+ }):
1110
+ load_model(
1111
+ task.data.model_ref,
1112
+ "test-models"
1113
+ )
1114
+ ```
1115
+ Parameters:
1116
+ ----------
1117
+
1118
+ type: str
1119
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1120
+
1121
+ config: dict or Callable
1122
+ Dictionary of configuration options for the datastore. The following keys are required:
1123
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1124
+ - example: 's3://bucket-name/path/to/root'
1125
+ - example: 'gs://bucket-name/path/to/root'
1126
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1127
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1128
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1129
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1059
1130
  """
1060
1131
  ...
1061
1132
 
1062
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1133
+ @typing.overload
1134
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1063
1135
  """
1064
- Specifies what flows belong to the same project.
1136
+ Specifies the times when the flow should be run when running on a
1137
+ production scheduler.
1065
1138
 
1066
- A project-specific namespace is created for all flows that
1067
- use the same `@project(name)`.
1139
+
1140
+ Parameters
1141
+ ----------
1142
+ hourly : bool, default False
1143
+ Run the workflow hourly.
1144
+ daily : bool, default True
1145
+ Run the workflow daily.
1146
+ weekly : bool, default False
1147
+ Run the workflow weekly.
1148
+ cron : str, optional, default None
1149
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1150
+ specified by this expression.
1151
+ timezone : str, optional, default None
1152
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1153
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1154
+ """
1155
+ ...
1156
+
1157
+ @typing.overload
1158
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1159
+ ...
1160
+
1161
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1162
+ """
1163
+ Specifies the times when the flow should be run when running on a
1164
+ production scheduler.
1068
1165
 
1069
1166
 
1070
1167
  Parameters
1071
1168
  ----------
1072
- name : str
1073
- Project name. Make sure that the name is unique amongst all
1074
- projects that use the same production scheduler. The name may
1075
- contain only lowercase alphanumeric characters and underscores.
1076
-
1077
- branch : Optional[str], default None
1078
- The branch to use. If not specified, the branch is set to
1079
- `user.<username>` unless `production` is set to `True`. This can
1080
- also be set on the command line using `--branch` as a top-level option.
1081
- It is an error to specify `branch` in the decorator and on the command line.
1082
-
1083
- production : bool, default False
1084
- Whether or not the branch is the production branch. This can also be set on the
1085
- command line using `--production` as a top-level option. It is an error to specify
1086
- `production` in the decorator and on the command line.
1087
- The project branch name will be:
1088
- - if `branch` is specified:
1089
- - if `production` is True: `prod.<branch>`
1090
- - if `production` is False: `test.<branch>`
1091
- - if `branch` is not specified:
1092
- - if `production` is True: `prod`
1093
- - if `production` is False: `user.<username>`
1169
+ hourly : bool, default False
1170
+ Run the workflow hourly.
1171
+ daily : bool, default True
1172
+ Run the workflow daily.
1173
+ weekly : bool, default False
1174
+ Run the workflow weekly.
1175
+ cron : str, optional, default None
1176
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1177
+ specified by this expression.
1178
+ timezone : str, optional, default None
1179
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1180
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1094
1181
  """
1095
1182
  ...
1096
1183
 
@@ -1329,57 +1416,6 @@ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packag
1329
1416
  """
1330
1417
  ...
1331
1418
 
1332
- @typing.overload
1333
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1334
- """
1335
- Specifies the times when the flow should be run when running on a
1336
- production scheduler.
1337
-
1338
-
1339
- Parameters
1340
- ----------
1341
- hourly : bool, default False
1342
- Run the workflow hourly.
1343
- daily : bool, default True
1344
- Run the workflow daily.
1345
- weekly : bool, default False
1346
- Run the workflow weekly.
1347
- cron : str, optional, default None
1348
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1349
- specified by this expression.
1350
- timezone : str, optional, default None
1351
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1352
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1353
- """
1354
- ...
1355
-
1356
- @typing.overload
1357
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1358
- ...
1359
-
1360
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1361
- """
1362
- Specifies the times when the flow should be run when running on a
1363
- production scheduler.
1364
-
1365
-
1366
- Parameters
1367
- ----------
1368
- hourly : bool, default False
1369
- Run the workflow hourly.
1370
- daily : bool, default True
1371
- Run the workflow daily.
1372
- weekly : bool, default False
1373
- Run the workflow weekly.
1374
- cron : str, optional, default None
1375
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1376
- specified by this expression.
1377
- timezone : str, optional, default None
1378
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1379
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1380
- """
1381
- ...
1382
-
1383
1419
  @typing.overload
1384
1420
  def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1385
1421
  """
@@ -1431,117 +1467,81 @@ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packa
1431
1467
  """
1432
1468
  ...
1433
1469
 
1434
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1470
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1435
1471
  """
1436
- Allows setting external datastores to save data for the
1437
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1438
-
1439
- This decorator is useful when users wish to save data to a different datastore
1440
- than what is configured in Metaflow. This can be for variety of reasons:
1472
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1473
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1474
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1475
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1476
+ starts only after all sensors finish.
1441
1477
 
1442
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1443
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1444
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1445
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1446
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1447
1478
 
1448
- Usage:
1479
+ Parameters
1449
1480
  ----------
1481
+ timeout : int
1482
+ Time, in seconds before the task times out and fails. (Default: 3600)
1483
+ poke_interval : int
1484
+ Time in seconds that the job should wait in between each try. (Default: 60)
1485
+ mode : str
1486
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1487
+ exponential_backoff : bool
1488
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1489
+ pool : str
1490
+ the slot pool this task should run in,
1491
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1492
+ soft_fail : bool
1493
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1494
+ name : str
1495
+ Name of the sensor on Airflow
1496
+ description : str
1497
+ Description of sensor in the Airflow UI
1498
+ bucket_key : Union[str, List[str]]
1499
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1500
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1501
+ bucket_name : str
1502
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1503
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1504
+ wildcard_match : bool
1505
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1506
+ aws_conn_id : str
1507
+ a reference to the s3 connection on Airflow. (Default: None)
1508
+ verify : bool
1509
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1510
+ """
1511
+ ...
1512
+
1513
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1514
+ """
1515
+ Specifies what flows belong to the same project.
1450
1516
 
1451
- - Using a custom IAM role to access the datastore.
1452
-
1453
- ```python
1454
- @with_artifact_store(
1455
- type="s3",
1456
- config=lambda: {
1457
- "root": "s3://my-bucket-foo/path/to/root",
1458
- "role_arn": ROLE,
1459
- },
1460
- )
1461
- class MyFlow(FlowSpec):
1462
-
1463
- @checkpoint
1464
- @step
1465
- def start(self):
1466
- with open("my_file.txt", "w") as f:
1467
- f.write("Hello, World!")
1468
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1469
- self.next(self.end)
1470
-
1471
- ```
1472
-
1473
- - Using credentials to access the s3-compatible datastore.
1474
-
1475
- ```python
1476
- @with_artifact_store(
1477
- type="s3",
1478
- config=lambda: {
1479
- "root": "s3://my-bucket-foo/path/to/root",
1480
- "client_params": {
1481
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1482
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1483
- },
1484
- },
1485
- )
1486
- class MyFlow(FlowSpec):
1487
-
1488
- @checkpoint
1489
- @step
1490
- def start(self):
1491
- with open("my_file.txt", "w") as f:
1492
- f.write("Hello, World!")
1493
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1494
- self.next(self.end)
1495
-
1496
- ```
1497
-
1498
- - Accessing objects stored in external datastores after task execution.
1517
+ A project-specific namespace is created for all flows that
1518
+ use the same `@project(name)`.
1499
1519
 
1500
- ```python
1501
- run = Run("CheckpointsTestsFlow/8992")
1502
- with artifact_store_from(run=run, config={
1503
- "client_params": {
1504
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1505
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1506
- },
1507
- }):
1508
- with Checkpoint() as cp:
1509
- latest = cp.list(
1510
- task=run["start"].task
1511
- )[0]
1512
- print(latest)
1513
- cp.load(
1514
- latest,
1515
- "test-checkpoints"
1516
- )
1517
1520
 
1518
- task = Task("TorchTuneFlow/8484/train/53673")
1519
- with artifact_store_from(run=run, config={
1520
- "client_params": {
1521
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1522
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1523
- },
1524
- }):
1525
- load_model(
1526
- task.data.model_ref,
1527
- "test-models"
1528
- )
1529
- ```
1530
- Parameters:
1521
+ Parameters
1531
1522
  ----------
1523
+ name : str
1524
+ Project name. Make sure that the name is unique amongst all
1525
+ projects that use the same production scheduler. The name may
1526
+ contain only lowercase alphanumeric characters and underscores.
1532
1527
 
1533
- type: str
1534
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1528
+ branch : Optional[str], default None
1529
+ The branch to use. If not specified, the branch is set to
1530
+ `user.<username>` unless `production` is set to `True`. This can
1531
+ also be set on the command line using `--branch` as a top-level option.
1532
+ It is an error to specify `branch` in the decorator and on the command line.
1535
1533
 
1536
- config: dict or Callable
1537
- Dictionary of configuration options for the datastore. The following keys are required:
1538
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1539
- - example: 's3://bucket-name/path/to/root'
1540
- - example: 'gs://bucket-name/path/to/root'
1541
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1542
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1543
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1544
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1534
+ production : bool, default False
1535
+ Whether or not the branch is the production branch. This can also be set on the
1536
+ command line using `--production` as a top-level option. It is an error to specify
1537
+ `production` in the decorator and on the command line.
1538
+ The project branch name will be:
1539
+ - if `branch` is specified:
1540
+ - if `production` is True: `prod.<branch>`
1541
+ - if `production` is False: `test.<branch>`
1542
+ - if `branch` is not specified:
1543
+ - if `production` is True: `prod`
1544
+ - if `production` is False: `user.<username>`
1545
1545
  """
1546
1546
  ...
1547
1547