ob-metaflow-stubs 6.0.3.184__py2.py3-none-any.whl → 6.0.3.186__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. metaflow-stubs/__init__.pyi +1009 -792
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +5 -5
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +3 -3
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +4 -4
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/info_file.pyi +2 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +2 -2
  20. metaflow-stubs/metaflow_current.pyi +31 -179
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +3 -3
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +3 -3
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +4 -4
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +4 -4
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +50 -48
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +10 -14
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +3 -3
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +2 -2
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +86 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +79 -76
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +48 -13
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +3 -3
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +3 -3
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  83. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  84. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  85. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +2 -2
  86. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  87. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -2
  88. metaflow-stubs/multicore_utils.pyi +2 -2
  89. metaflow-stubs/ob_internal.pyi +2 -2
  90. metaflow-stubs/parameters.pyi +3 -3
  91. metaflow-stubs/plugins/__init__.pyi +12 -12
  92. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  93. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  94. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  95. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  96. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  97. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  98. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  99. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  100. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  101. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  102. metaflow-stubs/plugins/argo/argo_workflows.pyi +3 -3
  103. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +2 -2
  104. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  105. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +2 -2
  106. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  107. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  108. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  109. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  110. metaflow-stubs/plugins/aws/batch/batch.pyi +2 -2
  111. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  112. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -2
  113. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  114. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  115. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  116. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  117. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  118. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +2 -2
  119. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  120. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  121. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +2 -2
  122. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  123. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  124. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  125. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  126. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  127. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  128. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  129. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  130. metaflow-stubs/plugins/cards/card_client.pyi +3 -3
  131. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  132. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  133. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  134. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  135. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  136. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  137. metaflow-stubs/plugins/cards/card_modules/components.pyi +3 -3
  138. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  139. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  140. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  141. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  142. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  143. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  144. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  145. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  146. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  147. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  148. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  149. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  150. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  151. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  152. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  153. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  154. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  155. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  156. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  157. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  158. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  159. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  160. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  161. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  162. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  163. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  164. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  165. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +2 -2
  166. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  167. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +2 -2
  168. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  169. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  170. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  171. metaflow-stubs/plugins/parallel_decorator.pyi +2 -2
  172. metaflow-stubs/plugins/perimeters.pyi +2 -2
  173. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  174. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  175. metaflow-stubs/plugins/pypi/conda_decorator.pyi +2 -2
  176. metaflow-stubs/plugins/pypi/conda_environment.pyi +3 -3
  177. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  178. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +2 -2
  179. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  180. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  181. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  182. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  183. metaflow-stubs/plugins/secrets/__init__.pyi +2 -2
  184. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  185. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  186. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  187. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  188. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  189. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  190. metaflow-stubs/plugins/torchtune/__init__.pyi +4 -3
  191. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  192. metaflow-stubs/plugins/uv/uv_environment.pyi +2 -2
  193. metaflow-stubs/profilers/__init__.pyi +2 -2
  194. metaflow-stubs/pylint_wrapper.pyi +2 -2
  195. metaflow-stubs/runner/__init__.pyi +2 -2
  196. metaflow-stubs/runner/deployer.pyi +4 -4
  197. metaflow-stubs/runner/deployer_impl.pyi +3 -3
  198. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  199. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  200. metaflow-stubs/runner/nbrun.pyi +2 -2
  201. metaflow-stubs/runner/subprocess_manager.pyi +2 -2
  202. metaflow-stubs/runner/utils.pyi +4 -4
  203. metaflow-stubs/system/__init__.pyi +2 -2
  204. metaflow-stubs/system/system_logger.pyi +3 -3
  205. metaflow-stubs/system/system_monitor.pyi +2 -2
  206. metaflow-stubs/tagging_util.pyi +2 -2
  207. metaflow-stubs/tuple_util.pyi +2 -2
  208. metaflow-stubs/user_configs/__init__.pyi +2 -2
  209. metaflow-stubs/user_configs/config_decorators.pyi +6 -6
  210. metaflow-stubs/user_configs/config_options.pyi +3 -3
  211. metaflow-stubs/user_configs/config_parameters.pyi +7 -7
  212. {ob_metaflow_stubs-6.0.3.184.dist-info → ob_metaflow_stubs-6.0.3.186.dist-info}/METADATA +1 -1
  213. ob_metaflow_stubs-6.0.3.186.dist-info/RECORD +216 -0
  214. ob_metaflow_stubs-6.0.3.184.dist-info/RECORD +0 -216
  215. {ob_metaflow_stubs-6.0.3.184.dist-info → ob_metaflow_stubs-6.0.3.186.dist-info}/WHEEL +0 -0
  216. {ob_metaflow_stubs-6.0.3.184.dist-info → ob_metaflow_stubs-6.0.3.186.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.15.18.1+obcheckpoint(0.2.1);ob(v1) #
4
- # Generated on 2025-06-25T20:19:31.361984 #
3
+ # MF version: 2.15.18.1+obcheckpoint(0.2.4);ob(v1) #
4
+ # Generated on 2025-07-01T08:40:33.108391 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import datetime
12
11
  import typing
12
+ import datetime
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -35,18 +35,18 @@ from .user_configs.config_parameters import ConfigValue as ConfigValue
35
35
  from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
- from . import tuple_util as tuple_util
39
38
  from . import cards as cards
40
- from . import events as events
41
39
  from . import metaflow_git as metaflow_git
40
+ from . import tuple_util as tuple_util
41
+ from . import events as events
42
42
  from . import runner as runner
43
43
  from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
45
  from . import includefile as includefile
46
46
  from .includefile import IncludeFile as IncludeFile
47
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
48
47
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
48
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
49
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
50
50
  from . import client as client
51
51
  from .client.core import namespace as namespace
52
52
  from .client.core import get_namespace as get_namespace
@@ -155,78 +155,71 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
155
155
  """
156
156
  ...
157
157
 
158
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
158
+ @typing.overload
159
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
159
160
  """
160
- Specifies that this step should execute on DGX cloud.
161
+ Specifies the PyPI packages for the step.
162
+
163
+ Information in this decorator will augment any
164
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
165
+ you can use `@pypi_base` to set packages required by all
166
+ steps and use `@pypi` to specify step-specific overrides.
161
167
 
162
168
 
163
169
  Parameters
164
170
  ----------
165
- gpu : int
166
- Number of GPUs to use.
167
- gpu_type : str
168
- Type of Nvidia GPU to use.
169
- queue_timeout : int
170
- Time to keep the job in NVCF's queue.
171
+ packages : Dict[str, str], default: {}
172
+ Packages to use for this step. The key is the name of the package
173
+ and the value is the version to use.
174
+ python : str, optional, default: None
175
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
176
+ that the version used will correspond to the version of the Python interpreter used to start the run.
171
177
  """
172
178
  ...
173
179
 
174
180
  @typing.overload
175
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
181
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
182
+ ...
183
+
184
+ @typing.overload
185
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
186
+ ...
187
+
188
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
176
189
  """
177
- Specifies a timeout for your step.
178
-
179
- This decorator is useful if this step may hang indefinitely.
180
-
181
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
182
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
183
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
190
+ Specifies the PyPI packages for the step.
184
191
 
185
- Note that all the values specified in parameters are added together so if you specify
186
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
192
+ Information in this decorator will augment any
193
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
194
+ you can use `@pypi_base` to set packages required by all
195
+ steps and use `@pypi` to specify step-specific overrides.
187
196
 
188
197
 
189
198
  Parameters
190
199
  ----------
191
- seconds : int, default 0
192
- Number of seconds to wait prior to timing out.
193
- minutes : int, default 0
194
- Number of minutes to wait prior to timing out.
195
- hours : int, default 0
196
- Number of hours to wait prior to timing out.
200
+ packages : Dict[str, str], default: {}
201
+ Packages to use for this step. The key is the name of the package
202
+ and the value is the version to use.
203
+ python : str, optional, default: None
204
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
205
+ that the version used will correspond to the version of the Python interpreter used to start the run.
197
206
  """
198
207
  ...
199
208
 
200
209
  @typing.overload
201
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
210
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
211
+ """
212
+ Internal decorator to support Fast bakery
213
+ """
202
214
  ...
203
215
 
204
216
  @typing.overload
205
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
217
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
206
218
  ...
207
219
 
208
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
220
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
209
221
  """
210
- Specifies a timeout for your step.
211
-
212
- This decorator is useful if this step may hang indefinitely.
213
-
214
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
215
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
216
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
217
-
218
- Note that all the values specified in parameters are added together so if you specify
219
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
220
-
221
-
222
- Parameters
223
- ----------
224
- seconds : int, default 0
225
- Number of seconds to wait prior to timing out.
226
- minutes : int, default 0
227
- Number of minutes to wait prior to timing out.
228
- hours : int, default 0
229
- Number of hours to wait prior to timing out.
222
+ Internal decorator to support Fast bakery
230
223
  """
231
224
  ...
232
225
 
@@ -234,6 +227,61 @@ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.
234
227
  """
235
228
  Decorator that helps cache, version and store models/datasets from huggingface hub.
236
229
 
230
+ > Examples
231
+
232
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
233
+ ```python
234
+ @huggingface_hub
235
+ @step
236
+ def pull_model_from_huggingface(self):
237
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
238
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
239
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
240
+ # value of the function is a reference to the model in the backend storage.
241
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
242
+
243
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
244
+ self.llama_model = current.huggingface_hub.snapshot_download(
245
+ repo_id=self.model_id,
246
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
247
+ )
248
+ self.next(self.train)
249
+ ```
250
+
251
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
252
+ ```python
253
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
254
+ @step
255
+ def pull_model_from_huggingface(self):
256
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
257
+ ```
258
+
259
+ ```python
260
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
261
+ @step
262
+ def finetune_model(self):
263
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
264
+ # path_to_model will be /my-directory
265
+ ```
266
+
267
+ ```python
268
+ # Takes all the arguments passed to `snapshot_download`
269
+ # except for `local_dir`
270
+ @huggingface_hub(load=[
271
+ {
272
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
273
+ },
274
+ {
275
+ "repo_id": "myorg/mistral-lora",
276
+ "repo_type": "model",
277
+ },
278
+ ])
279
+ @step
280
+ def finetune_model(self):
281
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
282
+ # path_to_model will be /my-directory
283
+ ```
284
+
237
285
 
238
286
  Parameters
239
287
  ----------
@@ -256,35 +304,37 @@ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.
256
304
  ...
257
305
 
258
306
  @typing.overload
259
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
307
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
260
308
  """
261
- Specifies environment variables to be set prior to the execution of a step.
309
+ Specifies secrets to be retrieved and injected as environment variables prior to
310
+ the execution of a step.
262
311
 
263
312
 
264
313
  Parameters
265
314
  ----------
266
- vars : Dict[str, str], default {}
267
- Dictionary of environment variables to set.
315
+ sources : List[Union[str, Dict[str, Any]]], default: []
316
+ List of secret specs, defining how the secrets are to be retrieved
268
317
  """
269
318
  ...
270
319
 
271
320
  @typing.overload
272
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
321
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
273
322
  ...
274
323
 
275
324
  @typing.overload
276
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
325
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
277
326
  ...
278
327
 
279
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
328
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
280
329
  """
281
- Specifies environment variables to be set prior to the execution of a step.
330
+ Specifies secrets to be retrieved and injected as environment variables prior to
331
+ the execution of a step.
282
332
 
283
333
 
284
334
  Parameters
285
335
  ----------
286
- vars : Dict[str, str], default {}
287
- Dictionary of environment variables to set.
336
+ sources : List[Union[str, Dict[str, Any]]], default: []
337
+ List of secret specs, defining how the secrets are to be retrieved
288
338
  """
289
339
  ...
290
340
 
@@ -340,197 +390,202 @@ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
340
390
  ...
341
391
 
342
392
  @typing.overload
343
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
393
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
344
394
  """
345
- Enables loading / saving of models within a step.
395
+ Specifies a timeout for your step.
346
396
 
397
+ This decorator is useful if this step may hang indefinitely.
347
398
 
399
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
400
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
401
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
348
402
 
349
- Parameters
350
- ----------
351
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
352
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
353
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
354
- - `current.checkpoint`
355
- - `current.model`
356
- - `current.huggingface_hub`
403
+ Note that all the values specified in parameters are added together so if you specify
404
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
357
405
 
358
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
359
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
360
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
361
406
 
362
- temp_dir_root : str, default: None
363
- The root directory under which `current.model.loaded` will store loaded models
407
+ Parameters
408
+ ----------
409
+ seconds : int, default 0
410
+ Number of seconds to wait prior to timing out.
411
+ minutes : int, default 0
412
+ Number of minutes to wait prior to timing out.
413
+ hours : int, default 0
414
+ Number of hours to wait prior to timing out.
364
415
  """
365
416
  ...
366
417
 
367
418
  @typing.overload
368
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
419
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
369
420
  ...
370
421
 
371
422
  @typing.overload
372
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
423
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
373
424
  ...
374
425
 
375
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
426
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
376
427
  """
377
- Enables loading / saving of models within a step.
428
+ Specifies a timeout for your step.
378
429
 
430
+ This decorator is useful if this step may hang indefinitely.
379
431
 
432
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
433
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
434
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
380
435
 
381
- Parameters
382
- ----------
383
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
384
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
385
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by:
386
- - `current.checkpoint`
387
- - `current.model`
388
- - `current.huggingface_hub`
436
+ Note that all the values specified in parameters are added together so if you specify
437
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
389
438
 
390
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
391
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
392
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
393
439
 
394
- temp_dir_root : str, default: None
395
- The root directory under which `current.model.loaded` will store loaded models
440
+ Parameters
441
+ ----------
442
+ seconds : int, default 0
443
+ Number of seconds to wait prior to timing out.
444
+ minutes : int, default 0
445
+ Number of minutes to wait prior to timing out.
446
+ hours : int, default 0
447
+ Number of hours to wait prior to timing out.
396
448
  """
397
449
  ...
398
450
 
399
- @typing.overload
400
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
451
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
401
452
  """
402
- Specifies the resources needed when executing this step.
403
-
404
- Use `@resources` to specify the resource requirements
405
- independently of the specific compute layer (`@batch`, `@kubernetes`).
406
-
407
- You can choose the compute layer on the command line by executing e.g.
408
- ```
409
- python myflow.py run --with batch
410
- ```
411
- or
412
- ```
413
- python myflow.py run --with kubernetes
414
- ```
415
- which executes the flow on the desired system using the
416
- requirements specified in `@resources`.
453
+ Specifies that this step should execute on DGX cloud.
417
454
 
418
455
 
419
456
  Parameters
420
457
  ----------
421
- cpu : int, default 1
422
- Number of CPUs required for this step.
423
- gpu : int, optional, default None
424
- Number of GPUs required for this step.
425
- disk : int, optional, default None
426
- Disk size (in MB) required for this step. Only applies on Kubernetes.
427
- memory : int, default 4096
428
- Memory size (in MB) required for this step.
429
- shared_memory : int, optional, default None
430
- The value for the size (in MiB) of the /dev/shm volume for this step.
431
- This parameter maps to the `--shm-size` option in Docker.
458
+ gpu : int
459
+ Number of GPUs to use.
460
+ gpu_type : str
461
+ Type of Nvidia GPU to use.
432
462
  """
433
463
  ...
434
464
 
435
- @typing.overload
436
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
465
+ def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
466
+ """
467
+ Specifies that this step is used to deploy an instance of the app.
468
+ Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
469
+
470
+
471
+ Parameters
472
+ ----------
473
+ app_port : int
474
+ Number of GPUs to use.
475
+ app_name : str
476
+ Name of the app to deploy.
477
+ """
437
478
  ...
438
479
 
439
- @typing.overload
440
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
480
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
481
+ """
482
+ Specifies that this step should execute on DGX cloud.
483
+
484
+
485
+ Parameters
486
+ ----------
487
+ gpu : int
488
+ Number of GPUs to use.
489
+ gpu_type : str
490
+ Type of Nvidia GPU to use.
491
+ queue_timeout : int
492
+ Time to keep the job in NVCF's queue.
493
+ """
441
494
  ...
442
495
 
443
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
496
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
444
497
  """
445
- Specifies the resources needed when executing this step.
446
-
447
- Use `@resources` to specify the resource requirements
448
- independently of the specific compute layer (`@batch`, `@kubernetes`).
449
-
450
- You can choose the compute layer on the command line by executing e.g.
451
- ```
452
- python myflow.py run --with batch
453
- ```
454
- or
455
- ```
456
- python myflow.py run --with kubernetes
457
- ```
458
- which executes the flow on the desired system using the
459
- requirements specified in `@resources`.
498
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
460
499
 
500
+ User code call
501
+ --------------
502
+ @ollama(
503
+ models=[...],
504
+ ...
505
+ )
461
506
 
462
- Parameters
463
- ----------
464
- cpu : int, default 1
465
- Number of CPUs required for this step.
466
- gpu : int, optional, default None
467
- Number of GPUs required for this step.
468
- disk : int, optional, default None
469
- Disk size (in MB) required for this step. Only applies on Kubernetes.
470
- memory : int, default 4096
471
- Memory size (in MB) required for this step.
472
- shared_memory : int, optional, default None
473
- The value for the size (in MiB) of the /dev/shm volume for this step.
474
- This parameter maps to the `--shm-size` option in Docker.
475
- """
476
- ...
477
-
478
- @typing.overload
479
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
480
- """
481
- Specifies the PyPI packages for the step.
507
+ Valid backend options
508
+ ---------------------
509
+ - 'local': Run as a separate process on the local task machine.
510
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
511
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
482
512
 
483
- Information in this decorator will augment any
484
- attributes set in the `@pyi_base` flow-level decorator. Hence,
485
- you can use `@pypi_base` to set packages required by all
486
- steps and use `@pypi` to specify step-specific overrides.
513
+ Valid model options
514
+ -------------------
515
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
487
516
 
488
517
 
489
518
  Parameters
490
519
  ----------
491
- packages : Dict[str, str], default: {}
492
- Packages to use for this step. The key is the name of the package
493
- and the value is the version to use.
494
- python : str, optional, default: None
495
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
496
- that the version used will correspond to the version of the Python interpreter used to start the run.
520
+ models: list[str]
521
+ List of Ollama containers running models in sidecars.
522
+ backend: str
523
+ Determines where and how to run the Ollama process.
524
+ force_pull: bool
525
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
526
+ cache_update_policy: str
527
+ Cache update policy: "auto", "force", or "never".
528
+ force_cache_update: bool
529
+ Simple override for "force" cache update policy.
530
+ debug: bool
531
+ Whether to turn on verbose debugging logs.
532
+ circuit_breaker_config: dict
533
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
534
+ timeout_config: dict
535
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
497
536
  """
498
537
  ...
499
538
 
500
539
  @typing.overload
501
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
502
- ...
503
-
504
- @typing.overload
505
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
506
- ...
507
-
508
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
540
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
509
541
  """
510
- Specifies the PyPI packages for the step.
542
+ Enables checkpointing for a step.
511
543
 
512
- Information in this decorator will augment any
513
- attributes set in the `@pyi_base` flow-level decorator. Hence,
514
- you can use `@pypi_base` to set packages required by all
515
- steps and use `@pypi` to specify step-specific overrides.
544
+ > Examples
516
545
 
546
+ - Saving Checkpoints
517
547
 
518
- Parameters
519
- ----------
520
- packages : Dict[str, str], default: {}
521
- Packages to use for this step. The key is the name of the package
522
- and the value is the version to use.
523
- python : str, optional, default: None
524
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
525
- that the version used will correspond to the version of the Python interpreter used to start the run.
526
- """
527
- ...
528
-
529
- @typing.overload
530
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
531
- """
532
- Enables checkpointing for a step.
548
+ ```python
549
+ @checkpoint
550
+ @step
551
+ def train(self):
552
+ model = create_model(self.parameters, checkpoint_path = None)
553
+ for i in range(self.epochs):
554
+ # some training logic
555
+ loss = model.train(self.dataset)
556
+ if i % 10 == 0:
557
+ model.save(
558
+ current.checkpoint.directory,
559
+ )
560
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
561
+ # and returns a reference dictionary to the checkpoint saved in the datastore
562
+ self.latest_checkpoint = current.checkpoint.save(
563
+ name="epoch_checkpoint",
564
+ metadata={
565
+ "epoch": i,
566
+ "loss": loss,
567
+ }
568
+ )
569
+ ```
570
+
571
+ - Using Loaded Checkpoints
533
572
 
573
+ ```python
574
+ @retry(times=3)
575
+ @checkpoint
576
+ @step
577
+ def train(self):
578
+ # Assume that the task has restarted and the previous attempt of the task
579
+ # saved a checkpoint
580
+ checkpoint_path = None
581
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
582
+ print("Loaded checkpoint from the previous attempt")
583
+ checkpoint_path = current.checkpoint.directory
584
+
585
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
586
+ for i in range(self.epochs):
587
+ ...
588
+ ```
534
589
 
535
590
 
536
591
  Parameters
@@ -563,6 +618,51 @@ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None
563
618
  """
564
619
  Enables checkpointing for a step.
565
620
 
621
+ > Examples
622
+
623
+ - Saving Checkpoints
624
+
625
+ ```python
626
+ @checkpoint
627
+ @step
628
+ def train(self):
629
+ model = create_model(self.parameters, checkpoint_path = None)
630
+ for i in range(self.epochs):
631
+ # some training logic
632
+ loss = model.train(self.dataset)
633
+ if i % 10 == 0:
634
+ model.save(
635
+ current.checkpoint.directory,
636
+ )
637
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
638
+ # and returns a reference dictionary to the checkpoint saved in the datastore
639
+ self.latest_checkpoint = current.checkpoint.save(
640
+ name="epoch_checkpoint",
641
+ metadata={
642
+ "epoch": i,
643
+ "loss": loss,
644
+ }
645
+ )
646
+ ```
647
+
648
+ - Using Loaded Checkpoints
649
+
650
+ ```python
651
+ @retry(times=3)
652
+ @checkpoint
653
+ @step
654
+ def train(self):
655
+ # Assume that the task has restarted and the previous attempt of the task
656
+ # saved a checkpoint
657
+ checkpoint_path = None
658
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
659
+ print("Loaded checkpoint from the previous attempt")
660
+ checkpoint_path = current.checkpoint.directory
661
+
662
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
663
+ for i in range(self.epochs):
664
+ ...
665
+ ```
566
666
 
567
667
 
568
668
  Parameters
@@ -583,236 +683,120 @@ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None
583
683
  """
584
684
  ...
585
685
 
586
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
686
+ def vllm(*, model: str, backend: str, debug: bool, kwargs: typing.Any) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
587
687
  """
588
- Specifies that this step should execute on DGX cloud.
688
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
689
+
690
+ User code call
691
+ --------------
692
+ @vllm(
693
+ model="...",
694
+ ...
695
+ )
696
+
697
+ Valid backend options
698
+ ---------------------
699
+ - 'local': Run as a separate process on the local task machine.
700
+
701
+ Valid model options
702
+ -------------------
703
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
704
+
705
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
706
+ If you need multiple models, you must create multiple @vllm decorators.
589
707
 
590
708
 
591
709
  Parameters
592
710
  ----------
593
- gpu : int
594
- Number of GPUs to use.
595
- gpu_type : str
596
- Type of Nvidia GPU to use.
597
- """
598
- ...
599
-
600
- @typing.overload
601
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
602
- """
603
- Internal decorator to support Fast bakery
604
- """
605
- ...
606
-
607
- @typing.overload
608
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
609
- ...
610
-
611
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
612
- """
613
- Internal decorator to support Fast bakery
711
+ model: str
712
+ HuggingFace model identifier to be served by vLLM.
713
+ backend: str
714
+ Determines where and how to run the vLLM process.
715
+ debug: bool
716
+ Whether to turn on verbose debugging logs.
717
+ kwargs : Any
718
+ Any other keyword arguments are passed directly to the vLLM engine.
719
+ This allows for flexible configuration of vLLM server settings.
720
+ For example, `tensor_parallel_size=2`.
614
721
  """
615
722
  ...
616
723
 
617
724
  @typing.overload
618
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
725
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
619
726
  """
620
- Specifies the Conda environment for the step.
727
+ Specifies the resources needed when executing this step.
621
728
 
622
- Information in this decorator will augment any
623
- attributes set in the `@conda_base` flow-level decorator. Hence,
624
- you can use `@conda_base` to set packages required by all
625
- steps and use `@conda` to specify step-specific overrides.
729
+ Use `@resources` to specify the resource requirements
730
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
731
+
732
+ You can choose the compute layer on the command line by executing e.g.
733
+ ```
734
+ python myflow.py run --with batch
735
+ ```
736
+ or
737
+ ```
738
+ python myflow.py run --with kubernetes
739
+ ```
740
+ which executes the flow on the desired system using the
741
+ requirements specified in `@resources`.
626
742
 
627
743
 
628
744
  Parameters
629
745
  ----------
630
- packages : Dict[str, str], default {}
631
- Packages to use for this step. The key is the name of the package
632
- and the value is the version to use.
633
- libraries : Dict[str, str], default {}
634
- Supported for backward compatibility. When used with packages, packages will take precedence.
635
- python : str, optional, default None
636
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
637
- that the version used will correspond to the version of the Python interpreter used to start the run.
638
- disabled : bool, default False
639
- If set to True, disables @conda.
746
+ cpu : int, default 1
747
+ Number of CPUs required for this step.
748
+ gpu : int, optional, default None
749
+ Number of GPUs required for this step.
750
+ disk : int, optional, default None
751
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
752
+ memory : int, default 4096
753
+ Memory size (in MB) required for this step.
754
+ shared_memory : int, optional, default None
755
+ The value for the size (in MiB) of the /dev/shm volume for this step.
756
+ This parameter maps to the `--shm-size` option in Docker.
640
757
  """
641
758
  ...
642
759
 
643
760
  @typing.overload
644
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
761
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
645
762
  ...
646
763
 
647
764
  @typing.overload
648
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
765
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
649
766
  ...
650
767
 
651
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
768
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
652
769
  """
653
- Specifies the Conda environment for the step.
770
+ Specifies the resources needed when executing this step.
654
771
 
655
- Information in this decorator will augment any
656
- attributes set in the `@conda_base` flow-level decorator. Hence,
657
- you can use `@conda_base` to set packages required by all
658
- steps and use `@conda` to specify step-specific overrides.
772
+ Use `@resources` to specify the resource requirements
773
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
774
+
775
+ You can choose the compute layer on the command line by executing e.g.
776
+ ```
777
+ python myflow.py run --with batch
778
+ ```
779
+ or
780
+ ```
781
+ python myflow.py run --with kubernetes
782
+ ```
783
+ which executes the flow on the desired system using the
784
+ requirements specified in `@resources`.
659
785
 
660
786
 
661
787
  Parameters
662
788
  ----------
663
- packages : Dict[str, str], default {}
664
- Packages to use for this step. The key is the name of the package
665
- and the value is the version to use.
666
- libraries : Dict[str, str], default {}
667
- Supported for backward compatibility. When used with packages, packages will take precedence.
668
- python : str, optional, default None
669
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
670
- that the version used will correspond to the version of the Python interpreter used to start the run.
671
- disabled : bool, default False
672
- If set to True, disables @conda.
673
- """
674
- ...
675
-
676
- @typing.overload
677
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
678
- """
679
- Specifies the number of times the task corresponding
680
- to a step needs to be retried.
681
-
682
- This decorator is useful for handling transient errors, such as networking issues.
683
- If your task contains operations that can't be retried safely, e.g. database updates,
684
- it is advisable to annotate it with `@retry(times=0)`.
685
-
686
- This can be used in conjunction with the `@catch` decorator. The `@catch`
687
- decorator will execute a no-op task after all retries have been exhausted,
688
- ensuring that the flow execution can continue.
689
-
690
-
691
- Parameters
692
- ----------
693
- times : int, default 3
694
- Number of times to retry this task.
695
- minutes_between_retries : int, default 2
696
- Number of minutes between retries.
697
- """
698
- ...
699
-
700
- @typing.overload
701
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
702
- ...
703
-
704
- @typing.overload
705
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
706
- ...
707
-
708
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
709
- """
710
- Specifies the number of times the task corresponding
711
- to a step needs to be retried.
712
-
713
- This decorator is useful for handling transient errors, such as networking issues.
714
- If your task contains operations that can't be retried safely, e.g. database updates,
715
- it is advisable to annotate it with `@retry(times=0)`.
716
-
717
- This can be used in conjunction with the `@catch` decorator. The `@catch`
718
- decorator will execute a no-op task after all retries have been exhausted,
719
- ensuring that the flow execution can continue.
720
-
721
-
722
- Parameters
723
- ----------
724
- times : int, default 3
725
- Number of times to retry this task.
726
- minutes_between_retries : int, default 2
727
- Number of minutes between retries.
728
- """
729
- ...
730
-
731
- def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
732
- """
733
- Specifies that this step is used to deploy an instance of the app.
734
- Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
735
-
736
-
737
- Parameters
738
- ----------
739
- app_port : int
740
- Number of GPUs to use.
741
- app_name : str
742
- Name of the app to deploy.
743
- """
744
- ...
745
-
746
- @typing.overload
747
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
748
- """
749
- Specifies secrets to be retrieved and injected as environment variables prior to
750
- the execution of a step.
751
-
752
-
753
- Parameters
754
- ----------
755
- sources : List[Union[str, Dict[str, Any]]], default: []
756
- List of secret specs, defining how the secrets are to be retrieved
757
- """
758
- ...
759
-
760
- @typing.overload
761
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
762
- ...
763
-
764
- @typing.overload
765
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
766
- ...
767
-
768
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
769
- """
770
- Specifies secrets to be retrieved and injected as environment variables prior to
771
- the execution of a step.
772
-
773
-
774
- Parameters
775
- ----------
776
- sources : List[Union[str, Dict[str, Any]]], default: []
777
- List of secret specs, defining how the secrets are to be retrieved
778
- """
779
- ...
780
-
781
- def vllm(*, model: str, backend: str, debug: bool, kwargs: typing.Any) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
782
- """
783
- This decorator is used to run vllm APIs as Metaflow task sidecars.
784
-
785
- User code call
786
- --------------
787
- @vllm(
788
- model="...",
789
- ...
790
- )
791
-
792
- Valid backend options
793
- ---------------------
794
- - 'local': Run as a separate process on the local task machine.
795
-
796
- Valid model options
797
- -------------------
798
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
799
-
800
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
801
- If you need multiple models, you must create multiple @vllm decorators.
802
-
803
-
804
- Parameters
805
- ----------
806
- model: str
807
- HuggingFace model identifier to be served by vLLM.
808
- backend: str
809
- Determines where and how to run the vLLM process.
810
- debug: bool
811
- Whether to turn on verbose debugging logs.
812
- kwargs : Any
813
- Any other keyword arguments are passed directly to the vLLM engine.
814
- This allows for flexible configuration of vLLM server settings.
815
- For example, `tensor_parallel_size=2`.
789
+ cpu : int, default 1
790
+ Number of CPUs required for this step.
791
+ gpu : int, optional, default None
792
+ Number of GPUs required for this step.
793
+ disk : int, optional, default None
794
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
795
+ memory : int, default 4096
796
+ Memory size (in MB) required for this step.
797
+ shared_memory : int, optional, default None
798
+ The value for the size (in MiB) of the /dev/shm volume for this step.
799
+ This parameter maps to the `--shm-size` option in Docker.
816
800
  """
817
801
  ...
818
802
 
@@ -905,49 +889,6 @@ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: ty
905
889
  """
906
890
  ...
907
891
 
908
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
909
- """
910
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
911
-
912
- User code call
913
- --------------
914
- @ollama(
915
- models=[...],
916
- ...
917
- )
918
-
919
- Valid backend options
920
- ---------------------
921
- - 'local': Run as a separate process on the local task machine.
922
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
923
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
924
-
925
- Valid model options
926
- -------------------
927
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
928
-
929
-
930
- Parameters
931
- ----------
932
- models: list[str]
933
- List of Ollama containers running models in sidecars.
934
- backend: str
935
- Determines where and how to run the Ollama process.
936
- force_pull: bool
937
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
938
- cache_update_policy: str
939
- Cache update policy: "auto", "force", or "never".
940
- force_cache_update: bool
941
- Simple override for "force" cache update policy.
942
- debug: bool
943
- Whether to turn on verbose debugging logs.
944
- circuit_breaker_config: dict
945
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
946
- timeout_config: dict
947
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
948
- """
949
- ...
950
-
951
892
  @typing.overload
952
893
  def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
953
894
  """
@@ -1017,57 +958,494 @@ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None],
1017
958
  ...
1018
959
 
1019
960
  @typing.overload
1020
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
961
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1021
962
  """
1022
- Specifies the Conda environment for all steps of the flow.
963
+ Specifies the number of times the task corresponding
964
+ to a step needs to be retried.
1023
965
 
1024
- Use `@conda_base` to set common libraries required by all
1025
- steps and use `@conda` to specify step-specific additions.
966
+ This decorator is useful for handling transient errors, such as networking issues.
967
+ If your task contains operations that can't be retried safely, e.g. database updates,
968
+ it is advisable to annotate it with `@retry(times=0)`.
969
+
970
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
971
+ decorator will execute a no-op task after all retries have been exhausted,
972
+ ensuring that the flow execution can continue.
1026
973
 
1027
974
 
1028
975
  Parameters
1029
976
  ----------
1030
- packages : Dict[str, str], default {}
1031
- Packages to use for this flow. The key is the name of the package
1032
- and the value is the version to use.
1033
- libraries : Dict[str, str], default {}
1034
- Supported for backward compatibility. When used with packages, packages will take precedence.
1035
- python : str, optional, default None
1036
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1037
- that the version used will correspond to the version of the Python interpreter used to start the run.
1038
- disabled : bool, default False
1039
- If set to True, disables Conda.
977
+ times : int, default 3
978
+ Number of times to retry this task.
979
+ minutes_between_retries : int, default 2
980
+ Number of minutes between retries.
1040
981
  """
1041
982
  ...
1042
983
 
1043
984
  @typing.overload
1044
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
985
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1045
986
  ...
1046
987
 
1047
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
988
+ @typing.overload
989
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
990
+ ...
991
+
992
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1048
993
  """
1049
- Specifies the Conda environment for all steps of the flow.
994
+ Specifies the number of times the task corresponding
995
+ to a step needs to be retried.
1050
996
 
1051
- Use `@conda_base` to set common libraries required by all
1052
- steps and use `@conda` to specify step-specific additions.
997
+ This decorator is useful for handling transient errors, such as networking issues.
998
+ If your task contains operations that can't be retried safely, e.g. database updates,
999
+ it is advisable to annotate it with `@retry(times=0)`.
1000
+
1001
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1002
+ decorator will execute a no-op task after all retries have been exhausted,
1003
+ ensuring that the flow execution can continue.
1053
1004
 
1054
1005
 
1055
1006
  Parameters
1056
1007
  ----------
1057
- packages : Dict[str, str], default {}
1058
- Packages to use for this flow. The key is the name of the package
1059
- and the value is the version to use.
1060
- libraries : Dict[str, str], default {}
1061
- Supported for backward compatibility. When used with packages, packages will take precedence.
1062
- python : str, optional, default None
1063
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1064
- that the version used will correspond to the version of the Python interpreter used to start the run.
1065
- disabled : bool, default False
1066
- If set to True, disables Conda.
1008
+ times : int, default 3
1009
+ Number of times to retry this task.
1010
+ minutes_between_retries : int, default 2
1011
+ Number of minutes between retries.
1067
1012
  """
1068
1013
  ...
1069
1014
 
1070
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1015
+ @typing.overload
1016
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1017
+ """
1018
+ Specifies the Conda environment for the step.
1019
+
1020
+ Information in this decorator will augment any
1021
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1022
+ you can use `@conda_base` to set packages required by all
1023
+ steps and use `@conda` to specify step-specific overrides.
1024
+
1025
+
1026
+ Parameters
1027
+ ----------
1028
+ packages : Dict[str, str], default {}
1029
+ Packages to use for this step. The key is the name of the package
1030
+ and the value is the version to use.
1031
+ libraries : Dict[str, str], default {}
1032
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1033
+ python : str, optional, default None
1034
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1035
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1036
+ disabled : bool, default False
1037
+ If set to True, disables @conda.
1038
+ """
1039
+ ...
1040
+
1041
+ @typing.overload
1042
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1043
+ ...
1044
+
1045
+ @typing.overload
1046
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1047
+ ...
1048
+
1049
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1050
+ """
1051
+ Specifies the Conda environment for the step.
1052
+
1053
+ Information in this decorator will augment any
1054
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1055
+ you can use `@conda_base` to set packages required by all
1056
+ steps and use `@conda` to specify step-specific overrides.
1057
+
1058
+
1059
+ Parameters
1060
+ ----------
1061
+ packages : Dict[str, str], default {}
1062
+ Packages to use for this step. The key is the name of the package
1063
+ and the value is the version to use.
1064
+ libraries : Dict[str, str], default {}
1065
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1066
+ python : str, optional, default None
1067
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1068
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1069
+ disabled : bool, default False
1070
+ If set to True, disables @conda.
1071
+ """
1072
+ ...
1073
+
1074
+ @typing.overload
1075
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1076
+ """
1077
+ Enables loading / saving of models within a step.
1078
+
1079
+ > Examples
1080
+ - Saving Models
1081
+ ```python
1082
+ @model
1083
+ @step
1084
+ def train(self):
1085
+ # current.model.save returns a dictionary reference to the model saved
1086
+ self.my_model = current.model.save(
1087
+ path_to_my_model,
1088
+ label="my_model",
1089
+ metadata={
1090
+ "epochs": 10,
1091
+ "batch-size": 32,
1092
+ "learning-rate": 0.001,
1093
+ }
1094
+ )
1095
+ self.next(self.test)
1096
+
1097
+ @model(load="my_model")
1098
+ @step
1099
+ def test(self):
1100
+ # `current.model.loaded` returns a dictionary of the loaded models
1101
+ # where the key is the name of the artifact and the value is the path to the model
1102
+ print(os.listdir(current.model.loaded["my_model"]))
1103
+ self.next(self.end)
1104
+ ```
1105
+
1106
+ - Loading models
1107
+ ```python
1108
+ @step
1109
+ def train(self):
1110
+ # current.model.load returns the path to the model loaded
1111
+ checkpoint_path = current.model.load(
1112
+ self.checkpoint_key,
1113
+ )
1114
+ model_path = current.model.load(
1115
+ self.model,
1116
+ )
1117
+ self.next(self.test)
1118
+ ```
1119
+
1120
+
1121
+ Parameters
1122
+ ----------
1123
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1124
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1125
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1126
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1127
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1128
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
1129
+
1130
+ temp_dir_root : str, default: None
1131
+ The root directory under which `current.model.loaded` will store loaded models
1132
+ """
1133
+ ...
1134
+
1135
+ @typing.overload
1136
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1137
+ ...
1138
+
1139
+ @typing.overload
1140
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1141
+ ...
1142
+
1143
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
1144
+ """
1145
+ Enables loading / saving of models within a step.
1146
+
1147
+ > Examples
1148
+ - Saving Models
1149
+ ```python
1150
+ @model
1151
+ @step
1152
+ def train(self):
1153
+ # current.model.save returns a dictionary reference to the model saved
1154
+ self.my_model = current.model.save(
1155
+ path_to_my_model,
1156
+ label="my_model",
1157
+ metadata={
1158
+ "epochs": 10,
1159
+ "batch-size": 32,
1160
+ "learning-rate": 0.001,
1161
+ }
1162
+ )
1163
+ self.next(self.test)
1164
+
1165
+ @model(load="my_model")
1166
+ @step
1167
+ def test(self):
1168
+ # `current.model.loaded` returns a dictionary of the loaded models
1169
+ # where the key is the name of the artifact and the value is the path to the model
1170
+ print(os.listdir(current.model.loaded["my_model"]))
1171
+ self.next(self.end)
1172
+ ```
1173
+
1174
+ - Loading models
1175
+ ```python
1176
+ @step
1177
+ def train(self):
1178
+ # current.model.load returns the path to the model loaded
1179
+ checkpoint_path = current.model.load(
1180
+ self.checkpoint_key,
1181
+ )
1182
+ model_path = current.model.load(
1183
+ self.model,
1184
+ )
1185
+ self.next(self.test)
1186
+ ```
1187
+
1188
+
1189
+ Parameters
1190
+ ----------
1191
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1192
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1193
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1194
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1195
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1196
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
1197
+
1198
+ temp_dir_root : str, default: None
1199
+ The root directory under which `current.model.loaded` will store loaded models
1200
+ """
1201
+ ...
1202
+
1203
+ @typing.overload
1204
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1205
+ """
1206
+ Specifies environment variables to be set prior to the execution of a step.
1207
+
1208
+
1209
+ Parameters
1210
+ ----------
1211
+ vars : Dict[str, str], default {}
1212
+ Dictionary of environment variables to set.
1213
+ """
1214
+ ...
1215
+
1216
+ @typing.overload
1217
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1218
+ ...
1219
+
1220
+ @typing.overload
1221
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1222
+ ...
1223
+
1224
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1225
+ """
1226
+ Specifies environment variables to be set prior to the execution of a step.
1227
+
1228
+
1229
+ Parameters
1230
+ ----------
1231
+ vars : Dict[str, str], default {}
1232
+ Dictionary of environment variables to set.
1233
+ """
1234
+ ...
1235
+
1236
+ @typing.overload
1237
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1238
+ """
1239
+ Specifies the PyPI packages for all steps of the flow.
1240
+
1241
+ Use `@pypi_base` to set common packages required by all
1242
+ steps and use `@pypi` to specify step-specific overrides.
1243
+
1244
+ Parameters
1245
+ ----------
1246
+ packages : Dict[str, str], default: {}
1247
+ Packages to use for this flow. The key is the name of the package
1248
+ and the value is the version to use.
1249
+ python : str, optional, default: None
1250
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1251
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1252
+ """
1253
+ ...
1254
+
1255
+ @typing.overload
1256
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1257
+ ...
1258
+
1259
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1260
+ """
1261
+ Specifies the PyPI packages for all steps of the flow.
1262
+
1263
+ Use `@pypi_base` to set common packages required by all
1264
+ steps and use `@pypi` to specify step-specific overrides.
1265
+
1266
+ Parameters
1267
+ ----------
1268
+ packages : Dict[str, str], default: {}
1269
+ Packages to use for this flow. The key is the name of the package
1270
+ and the value is the version to use.
1271
+ python : str, optional, default: None
1272
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1273
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1274
+ """
1275
+ ...
1276
+
1277
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1278
+ """
1279
+ Specifies what flows belong to the same project.
1280
+
1281
+ A project-specific namespace is created for all flows that
1282
+ use the same `@project(name)`.
1283
+
1284
+
1285
+ Parameters
1286
+ ----------
1287
+ name : str
1288
+ Project name. Make sure that the name is unique amongst all
1289
+ projects that use the same production scheduler. The name may
1290
+ contain only lowercase alphanumeric characters and underscores.
1291
+
1292
+ branch : Optional[str], default None
1293
+ The branch to use. If not specified, the branch is set to
1294
+ `user.<username>` unless `production` is set to `True`. This can
1295
+ also be set on the command line using `--branch` as a top-level option.
1296
+ It is an error to specify `branch` in the decorator and on the command line.
1297
+
1298
+ production : bool, default False
1299
+ Whether or not the branch is the production branch. This can also be set on the
1300
+ command line using `--production` as a top-level option. It is an error to specify
1301
+ `production` in the decorator and on the command line.
1302
+ The project branch name will be:
1303
+ - if `branch` is specified:
1304
+ - if `production` is True: `prod.<branch>`
1305
+ - if `production` is False: `test.<branch>`
1306
+ - if `branch` is not specified:
1307
+ - if `production` is True: `prod`
1308
+ - if `production` is False: `user.<username>`
1309
+ """
1310
+ ...
1311
+
1312
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1313
+ """
1314
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1315
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1316
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1317
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1318
+ starts only after all sensors finish.
1319
+
1320
+
1321
+ Parameters
1322
+ ----------
1323
+ timeout : int
1324
+ Time, in seconds before the task times out and fails. (Default: 3600)
1325
+ poke_interval : int
1326
+ Time in seconds that the job should wait in between each try. (Default: 60)
1327
+ mode : str
1328
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1329
+ exponential_backoff : bool
1330
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1331
+ pool : str
1332
+ the slot pool this task should run in,
1333
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1334
+ soft_fail : bool
1335
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1336
+ name : str
1337
+ Name of the sensor on Airflow
1338
+ description : str
1339
+ Description of sensor in the Airflow UI
1340
+ bucket_key : Union[str, List[str]]
1341
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1342
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1343
+ bucket_name : str
1344
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1345
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1346
+ wildcard_match : bool
1347
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1348
+ aws_conn_id : str
1349
+ a reference to the s3 connection on Airflow. (Default: None)
1350
+ verify : bool
1351
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1352
+ """
1353
+ ...
1354
+
1355
+ @typing.overload
1356
+ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1357
+ """
1358
+ Specifies the event(s) that this flow depends on.
1359
+
1360
+ ```
1361
+ @trigger(event='foo')
1362
+ ```
1363
+ or
1364
+ ```
1365
+ @trigger(events=['foo', 'bar'])
1366
+ ```
1367
+
1368
+ Additionally, you can specify the parameter mappings
1369
+ to map event payload to Metaflow parameters for the flow.
1370
+ ```
1371
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1372
+ ```
1373
+ or
1374
+ ```
1375
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1376
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1377
+ ```
1378
+
1379
+ 'parameters' can also be a list of strings and tuples like so:
1380
+ ```
1381
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1382
+ ```
1383
+ This is equivalent to:
1384
+ ```
1385
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1386
+ ```
1387
+
1388
+
1389
+ Parameters
1390
+ ----------
1391
+ event : Union[str, Dict[str, Any]], optional, default None
1392
+ Event dependency for this flow.
1393
+ events : List[Union[str, Dict[str, Any]]], default []
1394
+ Events dependency for this flow.
1395
+ options : Dict[str, Any], default {}
1396
+ Backend-specific configuration for tuning eventing behavior.
1397
+ """
1398
+ ...
1399
+
1400
+ @typing.overload
1401
+ def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1402
+ ...
1403
+
1404
+ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1405
+ """
1406
+ Specifies the event(s) that this flow depends on.
1407
+
1408
+ ```
1409
+ @trigger(event='foo')
1410
+ ```
1411
+ or
1412
+ ```
1413
+ @trigger(events=['foo', 'bar'])
1414
+ ```
1415
+
1416
+ Additionally, you can specify the parameter mappings
1417
+ to map event payload to Metaflow parameters for the flow.
1418
+ ```
1419
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1420
+ ```
1421
+ or
1422
+ ```
1423
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1424
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1425
+ ```
1426
+
1427
+ 'parameters' can also be a list of strings and tuples like so:
1428
+ ```
1429
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1430
+ ```
1431
+ This is equivalent to:
1432
+ ```
1433
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1434
+ ```
1435
+
1436
+
1437
+ Parameters
1438
+ ----------
1439
+ event : Union[str, Dict[str, Any]], optional, default None
1440
+ Event dependency for this flow.
1441
+ events : List[Union[str, Dict[str, Any]]], default []
1442
+ Events dependency for this flow.
1443
+ options : Dict[str, Any], default {}
1444
+ Backend-specific configuration for tuning eventing behavior.
1445
+ """
1446
+ ...
1447
+
1448
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1071
1449
  """
1072
1450
  The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1073
1451
  This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
@@ -1110,171 +1488,6 @@ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str,
1110
1488
  """
1111
1489
  ...
1112
1490
 
1113
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1114
- """
1115
- Allows setting external datastores to save data for the
1116
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1117
-
1118
- This decorator is useful when users wish to save data to a different datastore
1119
- than what is configured in Metaflow. This can be for variety of reasons:
1120
-
1121
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1122
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1123
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1124
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1125
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1126
-
1127
- Usage:
1128
- ----------
1129
-
1130
- - Using a custom IAM role to access the datastore.
1131
-
1132
- ```python
1133
- @with_artifact_store(
1134
- type="s3",
1135
- config=lambda: {
1136
- "root": "s3://my-bucket-foo/path/to/root",
1137
- "role_arn": ROLE,
1138
- },
1139
- )
1140
- class MyFlow(FlowSpec):
1141
-
1142
- @checkpoint
1143
- @step
1144
- def start(self):
1145
- with open("my_file.txt", "w") as f:
1146
- f.write("Hello, World!")
1147
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1148
- self.next(self.end)
1149
-
1150
- ```
1151
-
1152
- - Using credentials to access the s3-compatible datastore.
1153
-
1154
- ```python
1155
- @with_artifact_store(
1156
- type="s3",
1157
- config=lambda: {
1158
- "root": "s3://my-bucket-foo/path/to/root",
1159
- "client_params": {
1160
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1161
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1162
- },
1163
- },
1164
- )
1165
- class MyFlow(FlowSpec):
1166
-
1167
- @checkpoint
1168
- @step
1169
- def start(self):
1170
- with open("my_file.txt", "w") as f:
1171
- f.write("Hello, World!")
1172
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1173
- self.next(self.end)
1174
-
1175
- ```
1176
-
1177
- - Accessing objects stored in external datastores after task execution.
1178
-
1179
- ```python
1180
- run = Run("CheckpointsTestsFlow/8992")
1181
- with artifact_store_from(run=run, config={
1182
- "client_params": {
1183
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1184
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1185
- },
1186
- }):
1187
- with Checkpoint() as cp:
1188
- latest = cp.list(
1189
- task=run["start"].task
1190
- )[0]
1191
- print(latest)
1192
- cp.load(
1193
- latest,
1194
- "test-checkpoints"
1195
- )
1196
-
1197
- task = Task("TorchTuneFlow/8484/train/53673")
1198
- with artifact_store_from(run=run, config={
1199
- "client_params": {
1200
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1201
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1202
- },
1203
- }):
1204
- load_model(
1205
- task.data.model_ref,
1206
- "test-models"
1207
- )
1208
- ```
1209
- Parameters:
1210
- ----------
1211
-
1212
- type: str
1213
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1214
-
1215
- config: dict or Callable
1216
- Dictionary of configuration options for the datastore. The following keys are required:
1217
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1218
- - example: 's3://bucket-name/path/to/root'
1219
- - example: 'gs://bucket-name/path/to/root'
1220
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1221
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1222
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1223
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1224
- """
1225
- ...
1226
-
1227
- @typing.overload
1228
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1229
- """
1230
- Specifies the times when the flow should be run when running on a
1231
- production scheduler.
1232
-
1233
-
1234
- Parameters
1235
- ----------
1236
- hourly : bool, default False
1237
- Run the workflow hourly.
1238
- daily : bool, default True
1239
- Run the workflow daily.
1240
- weekly : bool, default False
1241
- Run the workflow weekly.
1242
- cron : str, optional, default None
1243
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1244
- specified by this expression.
1245
- timezone : str, optional, default None
1246
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1247
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1248
- """
1249
- ...
1250
-
1251
- @typing.overload
1252
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1253
- ...
1254
-
1255
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1256
- """
1257
- Specifies the times when the flow should be run when running on a
1258
- production scheduler.
1259
-
1260
-
1261
- Parameters
1262
- ----------
1263
- hourly : bool, default False
1264
- Run the workflow hourly.
1265
- daily : bool, default True
1266
- Run the workflow daily.
1267
- weekly : bool, default False
1268
- Run the workflow weekly.
1269
- cron : str, optional, default None
1270
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1271
- specified by this expression.
1272
- timezone : str, optional, default None
1273
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1274
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1275
- """
1276
- ...
1277
-
1278
1491
  @typing.overload
1279
1492
  def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1280
1493
  """
@@ -1376,215 +1589,219 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1376
1589
  """
1377
1590
  ...
1378
1591
 
1379
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1592
+ @typing.overload
1593
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1380
1594
  """
1381
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1382
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1383
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1384
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1385
- starts only after all sensors finish.
1595
+ Specifies the times when the flow should be run when running on a
1596
+ production scheduler.
1386
1597
 
1387
1598
 
1388
1599
  Parameters
1389
1600
  ----------
1390
- timeout : int
1391
- Time, in seconds before the task times out and fails. (Default: 3600)
1392
- poke_interval : int
1393
- Time in seconds that the job should wait in between each try. (Default: 60)
1394
- mode : str
1395
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1396
- exponential_backoff : bool
1397
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1398
- pool : str
1399
- the slot pool this task should run in,
1400
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1401
- soft_fail : bool
1402
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1403
- name : str
1404
- Name of the sensor on Airflow
1405
- description : str
1406
- Description of sensor in the Airflow UI
1407
- bucket_key : Union[str, List[str]]
1408
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1409
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1410
- bucket_name : str
1411
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1412
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1413
- wildcard_match : bool
1414
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1415
- aws_conn_id : str
1416
- a reference to the s3 connection on Airflow. (Default: None)
1417
- verify : bool
1418
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1601
+ hourly : bool, default False
1602
+ Run the workflow hourly.
1603
+ daily : bool, default True
1604
+ Run the workflow daily.
1605
+ weekly : bool, default False
1606
+ Run the workflow weekly.
1607
+ cron : str, optional, default None
1608
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1609
+ specified by this expression.
1610
+ timezone : str, optional, default None
1611
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1612
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1419
1613
  """
1420
1614
  ...
1421
1615
 
1422
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1616
+ @typing.overload
1617
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1618
+ ...
1619
+
1620
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1423
1621
  """
1424
- Specifies what flows belong to the same project.
1425
-
1426
- A project-specific namespace is created for all flows that
1427
- use the same `@project(name)`.
1622
+ Specifies the times when the flow should be run when running on a
1623
+ production scheduler.
1428
1624
 
1429
1625
 
1430
1626
  Parameters
1431
1627
  ----------
1432
- name : str
1433
- Project name. Make sure that the name is unique amongst all
1434
- projects that use the same production scheduler. The name may
1435
- contain only lowercase alphanumeric characters and underscores.
1436
-
1437
- branch : Optional[str], default None
1438
- The branch to use. If not specified, the branch is set to
1439
- `user.<username>` unless `production` is set to `True`. This can
1440
- also be set on the command line using `--branch` as a top-level option.
1441
- It is an error to specify `branch` in the decorator and on the command line.
1442
-
1443
- production : bool, default False
1444
- Whether or not the branch is the production branch. This can also be set on the
1445
- command line using `--production` as a top-level option. It is an error to specify
1446
- `production` in the decorator and on the command line.
1447
- The project branch name will be:
1448
- - if `branch` is specified:
1449
- - if `production` is True: `prod.<branch>`
1450
- - if `production` is False: `test.<branch>`
1451
- - if `branch` is not specified:
1452
- - if `production` is True: `prod`
1453
- - if `production` is False: `user.<username>`
1628
+ hourly : bool, default False
1629
+ Run the workflow hourly.
1630
+ daily : bool, default True
1631
+ Run the workflow daily.
1632
+ weekly : bool, default False
1633
+ Run the workflow weekly.
1634
+ cron : str, optional, default None
1635
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1636
+ specified by this expression.
1637
+ timezone : str, optional, default None
1638
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1639
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1454
1640
  """
1455
1641
  ...
1456
1642
 
1457
1643
  @typing.overload
1458
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1644
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1459
1645
  """
1460
- Specifies the PyPI packages for all steps of the flow.
1646
+ Specifies the Conda environment for all steps of the flow.
1647
+
1648
+ Use `@conda_base` to set common libraries required by all
1649
+ steps and use `@conda` to specify step-specific additions.
1461
1650
 
1462
- Use `@pypi_base` to set common packages required by all
1463
- steps and use `@pypi` to specify step-specific overrides.
1464
1651
 
1465
1652
  Parameters
1466
1653
  ----------
1467
- packages : Dict[str, str], default: {}
1654
+ packages : Dict[str, str], default {}
1468
1655
  Packages to use for this flow. The key is the name of the package
1469
1656
  and the value is the version to use.
1470
- python : str, optional, default: None
1657
+ libraries : Dict[str, str], default {}
1658
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1659
+ python : str, optional, default None
1471
1660
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1472
1661
  that the version used will correspond to the version of the Python interpreter used to start the run.
1662
+ disabled : bool, default False
1663
+ If set to True, disables Conda.
1473
1664
  """
1474
1665
  ...
1475
1666
 
1476
1667
  @typing.overload
1477
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1668
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1478
1669
  ...
1479
1670
 
1480
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1671
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1481
1672
  """
1482
- Specifies the PyPI packages for all steps of the flow.
1673
+ Specifies the Conda environment for all steps of the flow.
1674
+
1675
+ Use `@conda_base` to set common libraries required by all
1676
+ steps and use `@conda` to specify step-specific additions.
1483
1677
 
1484
- Use `@pypi_base` to set common packages required by all
1485
- steps and use `@pypi` to specify step-specific overrides.
1486
1678
 
1487
1679
  Parameters
1488
1680
  ----------
1489
- packages : Dict[str, str], default: {}
1681
+ packages : Dict[str, str], default {}
1490
1682
  Packages to use for this flow. The key is the name of the package
1491
1683
  and the value is the version to use.
1492
- python : str, optional, default: None
1684
+ libraries : Dict[str, str], default {}
1685
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1686
+ python : str, optional, default None
1493
1687
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1494
1688
  that the version used will correspond to the version of the Python interpreter used to start the run.
1689
+ disabled : bool, default False
1690
+ If set to True, disables Conda.
1495
1691
  """
1496
1692
  ...
1497
1693
 
1498
- @typing.overload
1499
- def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1694
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1500
1695
  """
1501
- Specifies the event(s) that this flow depends on.
1696
+ Allows setting external datastores to save data for the
1697
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1502
1698
 
1503
- ```
1504
- @trigger(event='foo')
1505
- ```
1506
- or
1507
- ```
1508
- @trigger(events=['foo', 'bar'])
1509
- ```
1699
+ This decorator is useful when users wish to save data to a different datastore
1700
+ than what is configured in Metaflow. This can be for variety of reasons:
1510
1701
 
1511
- Additionally, you can specify the parameter mappings
1512
- to map event payload to Metaflow parameters for the flow.
1513
- ```
1514
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1515
- ```
1516
- or
1517
- ```
1518
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1519
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1520
- ```
1702
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1703
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1704
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1705
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1706
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1521
1707
 
1522
- 'parameters' can also be a list of strings and tuples like so:
1523
- ```
1524
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1525
- ```
1526
- This is equivalent to:
1527
- ```
1528
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1529
- ```
1708
+ Usage:
1709
+ ----------
1530
1710
 
1711
+ - Using a custom IAM role to access the datastore.
1531
1712
 
1532
- Parameters
1533
- ----------
1534
- event : Union[str, Dict[str, Any]], optional, default None
1535
- Event dependency for this flow.
1536
- events : List[Union[str, Dict[str, Any]]], default []
1537
- Events dependency for this flow.
1538
- options : Dict[str, Any], default {}
1539
- Backend-specific configuration for tuning eventing behavior.
1540
- """
1541
- ...
1542
-
1543
- @typing.overload
1544
- def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1545
- ...
1546
-
1547
- def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1548
- """
1549
- Specifies the event(s) that this flow depends on.
1713
+ ```python
1714
+ @with_artifact_store(
1715
+ type="s3",
1716
+ config=lambda: {
1717
+ "root": "s3://my-bucket-foo/path/to/root",
1718
+ "role_arn": ROLE,
1719
+ },
1720
+ )
1721
+ class MyFlow(FlowSpec):
1550
1722
 
1551
- ```
1552
- @trigger(event='foo')
1553
- ```
1554
- or
1555
- ```
1556
- @trigger(events=['foo', 'bar'])
1557
- ```
1723
+ @checkpoint
1724
+ @step
1725
+ def start(self):
1726
+ with open("my_file.txt", "w") as f:
1727
+ f.write("Hello, World!")
1728
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1729
+ self.next(self.end)
1558
1730
 
1559
- Additionally, you can specify the parameter mappings
1560
- to map event payload to Metaflow parameters for the flow.
1561
- ```
1562
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1563
- ```
1564
- or
1565
- ```
1566
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1567
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1568
- ```
1731
+ ```
1569
1732
 
1570
- 'parameters' can also be a list of strings and tuples like so:
1571
- ```
1572
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1573
- ```
1574
- This is equivalent to:
1575
- ```
1576
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1577
- ```
1733
+ - Using credentials to access the s3-compatible datastore.
1734
+
1735
+ ```python
1736
+ @with_artifact_store(
1737
+ type="s3",
1738
+ config=lambda: {
1739
+ "root": "s3://my-bucket-foo/path/to/root",
1740
+ "client_params": {
1741
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1742
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1743
+ },
1744
+ },
1745
+ )
1746
+ class MyFlow(FlowSpec):
1747
+
1748
+ @checkpoint
1749
+ @step
1750
+ def start(self):
1751
+ with open("my_file.txt", "w") as f:
1752
+ f.write("Hello, World!")
1753
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1754
+ self.next(self.end)
1578
1755
 
1756
+ ```
1579
1757
 
1580
- Parameters
1758
+ - Accessing objects stored in external datastores after task execution.
1759
+
1760
+ ```python
1761
+ run = Run("CheckpointsTestsFlow/8992")
1762
+ with artifact_store_from(run=run, config={
1763
+ "client_params": {
1764
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1765
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1766
+ },
1767
+ }):
1768
+ with Checkpoint() as cp:
1769
+ latest = cp.list(
1770
+ task=run["start"].task
1771
+ )[0]
1772
+ print(latest)
1773
+ cp.load(
1774
+ latest,
1775
+ "test-checkpoints"
1776
+ )
1777
+
1778
+ task = Task("TorchTuneFlow/8484/train/53673")
1779
+ with artifact_store_from(run=run, config={
1780
+ "client_params": {
1781
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1782
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1783
+ },
1784
+ }):
1785
+ load_model(
1786
+ task.data.model_ref,
1787
+ "test-models"
1788
+ )
1789
+ ```
1790
+ Parameters:
1581
1791
  ----------
1582
- event : Union[str, Dict[str, Any]], optional, default None
1583
- Event dependency for this flow.
1584
- events : List[Union[str, Dict[str, Any]]], default []
1585
- Events dependency for this flow.
1586
- options : Dict[str, Any], default {}
1587
- Backend-specific configuration for tuning eventing behavior.
1792
+
1793
+ type: str
1794
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1795
+
1796
+ config: dict or Callable
1797
+ Dictionary of configuration options for the datastore. The following keys are required:
1798
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1799
+ - example: 's3://bucket-name/path/to/root'
1800
+ - example: 'gs://bucket-name/path/to/root'
1801
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1802
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1803
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1804
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1588
1805
  """
1589
1806
  ...
1590
1807