ob-metaflow-stubs 6.0.3.186__py2.py3-none-any.whl → 6.0.3.188rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. metaflow-stubs/__init__.pyi +1074 -1062
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +5 -5
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +3 -3
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +2 -2
  14. metaflow-stubs/info_file.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +1 -1
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +56 -56
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +1 -1
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +2 -2
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +3 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +2 -2
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +1 -1
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +1 -1
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +1 -1
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +2 -2
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +1 -1
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +1 -1
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +1 -1
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +6 -0
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +22 -0
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +119 -0
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +6 -0
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +6 -0
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +19 -0
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +126 -0
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +98 -0
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +233 -0
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/cli_to_config.pyi +17 -0
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +12 -0
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +12 -0
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +242 -0
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +50 -0
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +27 -0
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +30 -0
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/secrets.pyi +46 -0
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +91 -0
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/validations.pyi +24 -0
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +1 -1
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +1 -1
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +1 -1
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +1 -1
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +6 -0
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +51 -0
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +65 -0
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +74 -0
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  106. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  107. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  108. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  109. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  110. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +1 -1
  111. metaflow-stubs/multicore_utils.pyi +1 -1
  112. metaflow-stubs/ob_internal.pyi +3 -1
  113. metaflow-stubs/parameters.pyi +2 -2
  114. metaflow-stubs/plugins/__init__.pyi +11 -11
  115. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  116. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  117. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  118. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  119. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  120. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  121. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  122. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  123. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  124. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  125. metaflow-stubs/plugins/argo/argo_workflows.pyi +1 -1
  126. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +2 -2
  127. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +1 -1
  128. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +2 -2
  129. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  130. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  131. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  132. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  133. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  134. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  135. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  136. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  137. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +2 -2
  138. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  139. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  140. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  141. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  142. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  143. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +1 -1
  144. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +1 -1
  145. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  146. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  147. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  148. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +2 -2
  149. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  150. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  151. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  152. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  153. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  154. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  155. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  156. metaflow-stubs/plugins/cards/card_decorator.pyi +1 -1
  157. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  158. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  159. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  160. metaflow-stubs/plugins/cards/card_modules/components.pyi +2 -2
  161. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  162. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  163. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  164. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  165. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  166. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  167. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  168. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  169. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  170. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  171. metaflow-stubs/plugins/datatools/s3/s3.pyi +2 -2
  172. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  173. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  174. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  175. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  176. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  177. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  178. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  179. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  180. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  181. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +2 -2
  182. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  183. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  184. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  185. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  186. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  187. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +1 -1
  188. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  189. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  190. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  191. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  192. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  193. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  194. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  195. metaflow-stubs/plugins/perimeters.pyi +1 -1
  196. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  197. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  198. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  199. metaflow-stubs/plugins/pypi/conda_environment.pyi +3 -3
  200. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  201. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  202. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  203. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  204. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  205. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  206. metaflow-stubs/plugins/secrets/__init__.pyi +1 -1
  207. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +1 -1
  208. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  209. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  210. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  211. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  212. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  213. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  214. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  215. metaflow-stubs/plugins/uv/uv_environment.pyi +2 -2
  216. metaflow-stubs/profilers/__init__.pyi +1 -1
  217. metaflow-stubs/pylint_wrapper.pyi +1 -1
  218. metaflow-stubs/runner/__init__.pyi +1 -1
  219. metaflow-stubs/runner/deployer.pyi +28 -28
  220. metaflow-stubs/runner/deployer_impl.pyi +1 -1
  221. metaflow-stubs/runner/metaflow_runner.pyi +1 -1
  222. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  223. metaflow-stubs/runner/nbrun.pyi +1 -1
  224. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  225. metaflow-stubs/runner/utils.pyi +1 -1
  226. metaflow-stubs/system/__init__.pyi +1 -1
  227. metaflow-stubs/system/system_logger.pyi +2 -2
  228. metaflow-stubs/system/system_monitor.pyi +1 -1
  229. metaflow-stubs/tagging_util.pyi +1 -1
  230. metaflow-stubs/tuple_util.pyi +1 -1
  231. metaflow-stubs/user_configs/__init__.pyi +1 -1
  232. metaflow-stubs/user_configs/config_decorators.pyi +3 -3
  233. metaflow-stubs/user_configs/config_options.pyi +1 -1
  234. metaflow-stubs/user_configs/config_parameters.pyi +5 -5
  235. {ob_metaflow_stubs-6.0.3.186.dist-info → ob_metaflow_stubs-6.0.3.188rc0.dist-info}/METADATA +1 -1
  236. ob_metaflow_stubs-6.0.3.188rc0.dist-info/RECORD +239 -0
  237. ob_metaflow_stubs-6.0.3.186.dist-info/RECORD +0 -216
  238. {ob_metaflow_stubs-6.0.3.186.dist-info → ob_metaflow_stubs-6.0.3.188rc0.dist-info}/WHEEL +0 -0
  239. {ob_metaflow_stubs-6.0.3.186.dist-info → ob_metaflow_stubs-6.0.3.188rc0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.15.18.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-01T08:40:33.108391 #
4
+ # Generated on 2025-07-03T01:34:48.431701 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -35,8 +35,8 @@ from .user_configs.config_parameters import ConfigValue as ConfigValue
35
35
  from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
- from . import cards as cards
39
38
  from . import metaflow_git as metaflow_git
39
+ from . import cards as cards
40
40
  from . import tuple_util as tuple_util
41
41
  from . import events as events
42
42
  from . import runner as runner
@@ -44,9 +44,9 @@ from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
45
  from . import includefile as includefile
46
46
  from .includefile import IncludeFile as IncludeFile
47
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
47
48
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
48
49
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
49
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
50
50
  from . import client as client
51
51
  from .client.core import namespace as namespace
52
52
  from .client.core import get_namespace as get_namespace
@@ -156,1095 +156,933 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
156
156
  ...
157
157
 
158
158
  @typing.overload
159
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
159
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
160
160
  """
161
- Specifies the PyPI packages for the step.
161
+ Specifies the Conda environment for the step.
162
162
 
163
163
  Information in this decorator will augment any
164
- attributes set in the `@pyi_base` flow-level decorator. Hence,
165
- you can use `@pypi_base` to set packages required by all
166
- steps and use `@pypi` to specify step-specific overrides.
164
+ attributes set in the `@conda_base` flow-level decorator. Hence,
165
+ you can use `@conda_base` to set packages required by all
166
+ steps and use `@conda` to specify step-specific overrides.
167
167
 
168
168
 
169
169
  Parameters
170
170
  ----------
171
- packages : Dict[str, str], default: {}
171
+ packages : Dict[str, str], default {}
172
172
  Packages to use for this step. The key is the name of the package
173
173
  and the value is the version to use.
174
- python : str, optional, default: None
174
+ libraries : Dict[str, str], default {}
175
+ Supported for backward compatibility. When used with packages, packages will take precedence.
176
+ python : str, optional, default None
175
177
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
176
178
  that the version used will correspond to the version of the Python interpreter used to start the run.
179
+ disabled : bool, default False
180
+ If set to True, disables @conda.
177
181
  """
178
182
  ...
179
183
 
180
184
  @typing.overload
181
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
185
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
182
186
  ...
183
187
 
184
188
  @typing.overload
185
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
189
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
186
190
  ...
187
191
 
188
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
192
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
189
193
  """
190
- Specifies the PyPI packages for the step.
194
+ Specifies the Conda environment for the step.
191
195
 
192
196
  Information in this decorator will augment any
193
- attributes set in the `@pyi_base` flow-level decorator. Hence,
194
- you can use `@pypi_base` to set packages required by all
195
- steps and use `@pypi` to specify step-specific overrides.
197
+ attributes set in the `@conda_base` flow-level decorator. Hence,
198
+ you can use `@conda_base` to set packages required by all
199
+ steps and use `@conda` to specify step-specific overrides.
196
200
 
197
201
 
198
202
  Parameters
199
203
  ----------
200
- packages : Dict[str, str], default: {}
204
+ packages : Dict[str, str], default {}
201
205
  Packages to use for this step. The key is the name of the package
202
206
  and the value is the version to use.
203
- python : str, optional, default: None
207
+ libraries : Dict[str, str], default {}
208
+ Supported for backward compatibility. When used with packages, packages will take precedence.
209
+ python : str, optional, default None
204
210
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
205
211
  that the version used will correspond to the version of the Python interpreter used to start the run.
212
+ disabled : bool, default False
213
+ If set to True, disables @conda.
206
214
  """
207
215
  ...
208
216
 
209
217
  @typing.overload
210
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
218
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
211
219
  """
212
- Internal decorator to support Fast bakery
220
+ Decorator prototype for all step decorators. This function gets specialized
221
+ and imported for all decorators types by _import_plugin_decorators().
213
222
  """
214
223
  ...
215
224
 
216
225
  @typing.overload
217
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
226
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
218
227
  ...
219
228
 
220
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
229
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
221
230
  """
222
- Internal decorator to support Fast bakery
231
+ Decorator prototype for all step decorators. This function gets specialized
232
+ and imported for all decorators types by _import_plugin_decorators().
223
233
  """
224
234
  ...
225
235
 
226
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
236
+ @typing.overload
237
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
227
238
  """
228
- Decorator that helps cache, version and store models/datasets from huggingface hub.
229
-
230
- > Examples
231
-
232
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
233
- ```python
234
- @huggingface_hub
235
- @step
236
- def pull_model_from_huggingface(self):
237
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
238
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
239
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
240
- # value of the function is a reference to the model in the backend storage.
241
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
242
-
243
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
244
- self.llama_model = current.huggingface_hub.snapshot_download(
245
- repo_id=self.model_id,
246
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
247
- )
248
- self.next(self.train)
249
- ```
250
-
251
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
252
- ```python
253
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
254
- @step
255
- def pull_model_from_huggingface(self):
256
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
257
- ```
258
-
259
- ```python
260
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
261
- @step
262
- def finetune_model(self):
263
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
264
- # path_to_model will be /my-directory
265
- ```
266
-
267
- ```python
268
- # Takes all the arguments passed to `snapshot_download`
269
- # except for `local_dir`
270
- @huggingface_hub(load=[
271
- {
272
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
273
- },
274
- {
275
- "repo_id": "myorg/mistral-lora",
276
- "repo_type": "model",
277
- },
278
- ])
279
- @step
280
- def finetune_model(self):
281
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
282
- # path_to_model will be /my-directory
283
- ```
284
-
285
-
286
- Parameters
287
- ----------
288
- temp_dir_root : str, optional
289
- The root directory that will hold the temporary directory where objects will be downloaded.
290
-
291
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
292
- The list of repos (models/datasets) to load.
293
-
294
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
239
+ Specifies the number of times the task corresponding
240
+ to a step needs to be retried.
295
241
 
296
- - If repo (model/dataset) is not found in the datastore:
297
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
298
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
299
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
242
+ This decorator is useful for handling transient errors, such as networking issues.
243
+ If your task contains operations that can't be retried safely, e.g. database updates,
244
+ it is advisable to annotate it with `@retry(times=0)`.
300
245
 
301
- - If repo is found in the datastore:
302
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
303
- """
304
- ...
305
-
306
- @typing.overload
307
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
308
- """
309
- Specifies secrets to be retrieved and injected as environment variables prior to
310
- the execution of a step.
246
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
247
+ decorator will execute a no-op task after all retries have been exhausted,
248
+ ensuring that the flow execution can continue.
311
249
 
312
250
 
313
251
  Parameters
314
252
  ----------
315
- sources : List[Union[str, Dict[str, Any]]], default: []
316
- List of secret specs, defining how the secrets are to be retrieved
253
+ times : int, default 3
254
+ Number of times to retry this task.
255
+ minutes_between_retries : int, default 2
256
+ Number of minutes between retries.
317
257
  """
318
258
  ...
319
259
 
320
260
  @typing.overload
321
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
261
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
322
262
  ...
323
263
 
324
264
  @typing.overload
325
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
265
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
326
266
  ...
327
267
 
328
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
268
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
329
269
  """
330
- Specifies secrets to be retrieved and injected as environment variables prior to
331
- the execution of a step.
270
+ Specifies the number of times the task corresponding
271
+ to a step needs to be retried.
272
+
273
+ This decorator is useful for handling transient errors, such as networking issues.
274
+ If your task contains operations that can't be retried safely, e.g. database updates,
275
+ it is advisable to annotate it with `@retry(times=0)`.
276
+
277
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
278
+ decorator will execute a no-op task after all retries have been exhausted,
279
+ ensuring that the flow execution can continue.
332
280
 
333
281
 
334
282
  Parameters
335
283
  ----------
336
- sources : List[Union[str, Dict[str, Any]]], default: []
337
- List of secret specs, defining how the secrets are to be retrieved
284
+ times : int, default 3
285
+ Number of times to retry this task.
286
+ minutes_between_retries : int, default 2
287
+ Number of minutes between retries.
338
288
  """
339
289
  ...
340
290
 
341
- @typing.overload
342
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
291
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
343
292
  """
344
- Specifies that the step will success under all circumstances.
345
-
346
- The decorator will create an optional artifact, specified by `var`, which
347
- contains the exception raised. You can use it to detect the presence
348
- of errors, indicating that all happy-path artifacts produced by the step
349
- are missing.
293
+ Specifies that this step should execute on DGX cloud.
350
294
 
351
295
 
352
296
  Parameters
353
297
  ----------
354
- var : str, optional, default None
355
- Name of the artifact in which to store the caught exception.
356
- If not specified, the exception is not stored.
357
- print_exception : bool, default True
358
- Determines whether or not the exception is printed to
359
- stdout when caught.
298
+ gpu : int
299
+ Number of GPUs to use.
300
+ gpu_type : str
301
+ Type of Nvidia GPU to use.
360
302
  """
361
303
  ...
362
304
 
363
- @typing.overload
364
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
365
- ...
366
-
367
- @typing.overload
368
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
369
- ...
370
-
371
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
305
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
372
306
  """
373
- Specifies that the step will success under all circumstances.
307
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
374
308
 
375
- The decorator will create an optional artifact, specified by `var`, which
376
- contains the exception raised. You can use it to detect the presence
377
- of errors, indicating that all happy-path artifacts produced by the step
378
- are missing.
309
+ User code call
310
+ --------------
311
+ @ollama(
312
+ models=[...],
313
+ ...
314
+ )
315
+
316
+ Valid backend options
317
+ ---------------------
318
+ - 'local': Run as a separate process on the local task machine.
319
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
320
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
321
+
322
+ Valid model options
323
+ -------------------
324
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
379
325
 
380
326
 
381
327
  Parameters
382
328
  ----------
383
- var : str, optional, default None
384
- Name of the artifact in which to store the caught exception.
385
- If not specified, the exception is not stored.
386
- print_exception : bool, default True
387
- Determines whether or not the exception is printed to
388
- stdout when caught.
329
+ models: list[str]
330
+ List of Ollama containers running models in sidecars.
331
+ backend: str
332
+ Determines where and how to run the Ollama process.
333
+ force_pull: bool
334
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
335
+ cache_update_policy: str
336
+ Cache update policy: "auto", "force", or "never".
337
+ force_cache_update: bool
338
+ Simple override for "force" cache update policy.
339
+ debug: bool
340
+ Whether to turn on verbose debugging logs.
341
+ circuit_breaker_config: dict
342
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
343
+ timeout_config: dict
344
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
389
345
  """
390
346
  ...
391
347
 
392
348
  @typing.overload
393
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
349
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
394
350
  """
395
- Specifies a timeout for your step.
351
+ Enables loading / saving of models within a step.
396
352
 
397
- This decorator is useful if this step may hang indefinitely.
353
+ > Examples
354
+ - Saving Models
355
+ ```python
356
+ @model
357
+ @step
358
+ def train(self):
359
+ # current.model.save returns a dictionary reference to the model saved
360
+ self.my_model = current.model.save(
361
+ path_to_my_model,
362
+ label="my_model",
363
+ metadata={
364
+ "epochs": 10,
365
+ "batch-size": 32,
366
+ "learning-rate": 0.001,
367
+ }
368
+ )
369
+ self.next(self.test)
398
370
 
399
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
400
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
401
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
371
+ @model(load="my_model")
372
+ @step
373
+ def test(self):
374
+ # `current.model.loaded` returns a dictionary of the loaded models
375
+ # where the key is the name of the artifact and the value is the path to the model
376
+ print(os.listdir(current.model.loaded["my_model"]))
377
+ self.next(self.end)
378
+ ```
402
379
 
403
- Note that all the values specified in parameters are added together so if you specify
404
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
380
+ - Loading models
381
+ ```python
382
+ @step
383
+ def train(self):
384
+ # current.model.load returns the path to the model loaded
385
+ checkpoint_path = current.model.load(
386
+ self.checkpoint_key,
387
+ )
388
+ model_path = current.model.load(
389
+ self.model,
390
+ )
391
+ self.next(self.test)
392
+ ```
405
393
 
406
394
 
407
395
  Parameters
408
396
  ----------
409
- seconds : int, default 0
410
- Number of seconds to wait prior to timing out.
411
- minutes : int, default 0
412
- Number of minutes to wait prior to timing out.
413
- hours : int, default 0
414
- Number of hours to wait prior to timing out.
397
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
398
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
399
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
400
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
401
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
402
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
403
+
404
+ temp_dir_root : str, default: None
405
+ The root directory under which `current.model.loaded` will store loaded models
415
406
  """
416
407
  ...
417
408
 
418
409
  @typing.overload
419
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
410
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
420
411
  ...
421
412
 
422
413
  @typing.overload
423
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
424
- ...
425
-
426
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
427
- """
428
- Specifies a timeout for your step.
429
-
430
- This decorator is useful if this step may hang indefinitely.
431
-
432
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
433
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
434
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
435
-
436
- Note that all the values specified in parameters are added together so if you specify
437
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
438
-
439
-
440
- Parameters
441
- ----------
442
- seconds : int, default 0
443
- Number of seconds to wait prior to timing out.
444
- minutes : int, default 0
445
- Number of minutes to wait prior to timing out.
446
- hours : int, default 0
447
- Number of hours to wait prior to timing out.
448
- """
414
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
449
415
  ...
450
416
 
451
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
417
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
452
418
  """
453
- Specifies that this step should execute on DGX cloud.
454
-
419
+ Enables loading / saving of models within a step.
455
420
 
456
- Parameters
457
- ----------
458
- gpu : int
459
- Number of GPUs to use.
460
- gpu_type : str
461
- Type of Nvidia GPU to use.
462
- """
463
- ...
464
-
465
- def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
466
- """
467
- Specifies that this step is used to deploy an instance of the app.
468
- Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
421
+ > Examples
422
+ - Saving Models
423
+ ```python
424
+ @model
425
+ @step
426
+ def train(self):
427
+ # current.model.save returns a dictionary reference to the model saved
428
+ self.my_model = current.model.save(
429
+ path_to_my_model,
430
+ label="my_model",
431
+ metadata={
432
+ "epochs": 10,
433
+ "batch-size": 32,
434
+ "learning-rate": 0.001,
435
+ }
436
+ )
437
+ self.next(self.test)
469
438
 
439
+ @model(load="my_model")
440
+ @step
441
+ def test(self):
442
+ # `current.model.loaded` returns a dictionary of the loaded models
443
+ # where the key is the name of the artifact and the value is the path to the model
444
+ print(os.listdir(current.model.loaded["my_model"]))
445
+ self.next(self.end)
446
+ ```
470
447
 
471
- Parameters
472
- ----------
473
- app_port : int
474
- Number of GPUs to use.
475
- app_name : str
476
- Name of the app to deploy.
477
- """
478
- ...
479
-
480
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
481
- """
482
- Specifies that this step should execute on DGX cloud.
448
+ - Loading models
449
+ ```python
450
+ @step
451
+ def train(self):
452
+ # current.model.load returns the path to the model loaded
453
+ checkpoint_path = current.model.load(
454
+ self.checkpoint_key,
455
+ )
456
+ model_path = current.model.load(
457
+ self.model,
458
+ )
459
+ self.next(self.test)
460
+ ```
483
461
 
484
462
 
485
463
  Parameters
486
464
  ----------
487
- gpu : int
488
- Number of GPUs to use.
489
- gpu_type : str
490
- Type of Nvidia GPU to use.
491
- queue_timeout : int
492
- Time to keep the job in NVCF's queue.
493
- """
494
- ...
495
-
496
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
497
- """
498
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
499
-
500
- User code call
501
- --------------
502
- @ollama(
503
- models=[...],
504
- ...
505
- )
506
-
507
- Valid backend options
508
- ---------------------
509
- - 'local': Run as a separate process on the local task machine.
510
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
511
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
512
-
513
- Valid model options
514
- -------------------
515
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
516
-
465
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
466
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
467
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
468
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
469
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
470
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
517
471
 
518
- Parameters
519
- ----------
520
- models: list[str]
521
- List of Ollama containers running models in sidecars.
522
- backend: str
523
- Determines where and how to run the Ollama process.
524
- force_pull: bool
525
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
526
- cache_update_policy: str
527
- Cache update policy: "auto", "force", or "never".
528
- force_cache_update: bool
529
- Simple override for "force" cache update policy.
530
- debug: bool
531
- Whether to turn on verbose debugging logs.
532
- circuit_breaker_config: dict
533
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
534
- timeout_config: dict
535
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
472
+ temp_dir_root : str, default: None
473
+ The root directory under which `current.model.loaded` will store loaded models
536
474
  """
537
475
  ...
538
476
 
539
- @typing.overload
540
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
477
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
541
478
  """
542
- Enables checkpointing for a step.
479
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
543
480
 
544
481
  > Examples
545
482
 
546
- - Saving Checkpoints
547
-
483
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
548
484
  ```python
549
- @checkpoint
550
- @step
551
- def train(self):
552
- model = create_model(self.parameters, checkpoint_path = None)
553
- for i in range(self.epochs):
554
- # some training logic
555
- loss = model.train(self.dataset)
556
- if i % 10 == 0:
557
- model.save(
558
- current.checkpoint.directory,
559
- )
560
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
561
- # and returns a reference dictionary to the checkpoint saved in the datastore
562
- self.latest_checkpoint = current.checkpoint.save(
563
- name="epoch_checkpoint",
564
- metadata={
565
- "epoch": i,
566
- "loss": loss,
567
- }
568
- )
485
+ @huggingface_hub
486
+ @step
487
+ def pull_model_from_huggingface(self):
488
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
489
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
490
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
491
+ # value of the function is a reference to the model in the backend storage.
492
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
493
+
494
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
495
+ self.llama_model = current.huggingface_hub.snapshot_download(
496
+ repo_id=self.model_id,
497
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
498
+ )
499
+ self.next(self.train)
569
500
  ```
570
501
 
571
- - Using Loaded Checkpoints
572
-
573
- ```python
574
- @retry(times=3)
575
- @checkpoint
576
- @step
577
- def train(self):
578
- # Assume that the task has restarted and the previous attempt of the task
579
- # saved a checkpoint
580
- checkpoint_path = None
581
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
582
- print("Loaded checkpoint from the previous attempt")
583
- checkpoint_path = current.checkpoint.directory
584
-
585
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
586
- for i in range(self.epochs):
587
- ...
588
- ```
589
-
590
-
591
- Parameters
592
- ----------
593
- load_policy : str, default: "fresh"
594
- The policy for loading the checkpoint. The following policies are supported:
595
- - "eager": Loads the the latest available checkpoint within the namespace.
596
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
597
- will be loaded at the start of the task.
598
- - "none": Do not load any checkpoint
599
- - "fresh": Loads the lastest checkpoint created within the running Task.
600
- This mode helps loading checkpoints across various retry attempts of the same task.
601
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
602
- created within the task will be loaded when the task is retries execution on failure.
603
-
604
- temp_dir_root : str, default: None
605
- The root directory under which `current.checkpoint.directory` will be created.
606
- """
607
- ...
608
-
609
- @typing.overload
610
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
611
- ...
612
-
613
- @typing.overload
614
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
615
- ...
616
-
617
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
618
- """
619
- Enables checkpointing for a step.
620
-
621
- > Examples
622
-
623
- - Saving Checkpoints
624
-
625
- ```python
626
- @checkpoint
627
- @step
628
- def train(self):
629
- model = create_model(self.parameters, checkpoint_path = None)
630
- for i in range(self.epochs):
631
- # some training logic
632
- loss = model.train(self.dataset)
633
- if i % 10 == 0:
634
- model.save(
635
- current.checkpoint.directory,
636
- )
637
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
638
- # and returns a reference dictionary to the checkpoint saved in the datastore
639
- self.latest_checkpoint = current.checkpoint.save(
640
- name="epoch_checkpoint",
641
- metadata={
642
- "epoch": i,
643
- "loss": loss,
644
- }
645
- )
646
- ```
647
-
648
- - Using Loaded Checkpoints
649
-
502
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
650
503
  ```python
651
- @retry(times=3)
652
- @checkpoint
653
- @step
654
- def train(self):
655
- # Assume that the task has restarted and the previous attempt of the task
656
- # saved a checkpoint
657
- checkpoint_path = None
658
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
659
- print("Loaded checkpoint from the previous attempt")
660
- checkpoint_path = current.checkpoint.directory
661
-
662
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
663
- for i in range(self.epochs):
664
- ...
665
- ```
666
-
667
-
668
- Parameters
669
- ----------
670
- load_policy : str, default: "fresh"
671
- The policy for loading the checkpoint. The following policies are supported:
672
- - "eager": Loads the the latest available checkpoint within the namespace.
673
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
674
- will be loaded at the start of the task.
675
- - "none": Do not load any checkpoint
676
- - "fresh": Loads the lastest checkpoint created within the running Task.
677
- This mode helps loading checkpoints across various retry attempts of the same task.
678
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
679
- created within the task will be loaded when the task is retries execution on failure.
680
-
681
- temp_dir_root : str, default: None
682
- The root directory under which `current.checkpoint.directory` will be created.
683
- """
684
- ...
685
-
686
- def vllm(*, model: str, backend: str, debug: bool, kwargs: typing.Any) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
687
- """
688
- This decorator is used to run vllm APIs as Metaflow task sidecars.
689
-
690
- User code call
691
- --------------
692
- @vllm(
693
- model="...",
694
- ...
695
- )
696
-
697
- Valid backend options
698
- ---------------------
699
- - 'local': Run as a separate process on the local task machine.
700
-
701
- Valid model options
702
- -------------------
703
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
704
-
705
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
706
- If you need multiple models, you must create multiple @vllm decorators.
707
-
708
-
709
- Parameters
710
- ----------
711
- model: str
712
- HuggingFace model identifier to be served by vLLM.
713
- backend: str
714
- Determines where and how to run the vLLM process.
715
- debug: bool
716
- Whether to turn on verbose debugging logs.
717
- kwargs : Any
718
- Any other keyword arguments are passed directly to the vLLM engine.
719
- This allows for flexible configuration of vLLM server settings.
720
- For example, `tensor_parallel_size=2`.
721
- """
722
- ...
723
-
724
- @typing.overload
725
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
726
- """
727
- Specifies the resources needed when executing this step.
728
-
729
- Use `@resources` to specify the resource requirements
730
- independently of the specific compute layer (`@batch`, `@kubernetes`).
731
-
732
- You can choose the compute layer on the command line by executing e.g.
733
- ```
734
- python myflow.py run --with batch
735
- ```
736
- or
737
- ```
738
- python myflow.py run --with kubernetes
739
- ```
740
- which executes the flow on the desired system using the
741
- requirements specified in `@resources`.
742
-
743
-
744
- Parameters
745
- ----------
746
- cpu : int, default 1
747
- Number of CPUs required for this step.
748
- gpu : int, optional, default None
749
- Number of GPUs required for this step.
750
- disk : int, optional, default None
751
- Disk size (in MB) required for this step. Only applies on Kubernetes.
752
- memory : int, default 4096
753
- Memory size (in MB) required for this step.
754
- shared_memory : int, optional, default None
755
- The value for the size (in MiB) of the /dev/shm volume for this step.
756
- This parameter maps to the `--shm-size` option in Docker.
757
- """
758
- ...
759
-
760
- @typing.overload
761
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
762
- ...
763
-
764
- @typing.overload
765
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
766
- ...
767
-
768
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
769
- """
770
- Specifies the resources needed when executing this step.
771
-
772
- Use `@resources` to specify the resource requirements
773
- independently of the specific compute layer (`@batch`, `@kubernetes`).
774
-
775
- You can choose the compute layer on the command line by executing e.g.
776
- ```
777
- python myflow.py run --with batch
778
- ```
779
- or
780
- ```
781
- python myflow.py run --with kubernetes
504
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
505
+ @step
506
+ def pull_model_from_huggingface(self):
507
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
782
508
  ```
783
- which executes the flow on the desired system using the
784
- requirements specified in `@resources`.
785
-
786
-
787
- Parameters
788
- ----------
789
- cpu : int, default 1
790
- Number of CPUs required for this step.
791
- gpu : int, optional, default None
792
- Number of GPUs required for this step.
793
- disk : int, optional, default None
794
- Disk size (in MB) required for this step. Only applies on Kubernetes.
795
- memory : int, default 4096
796
- Memory size (in MB) required for this step.
797
- shared_memory : int, optional, default None
798
- The value for the size (in MiB) of the /dev/shm volume for this step.
799
- This parameter maps to the `--shm-size` option in Docker.
800
- """
801
- ...
802
-
803
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
804
- """
805
- Specifies that this step should execute on Kubernetes.
806
-
807
509
 
808
- Parameters
809
- ----------
810
- cpu : int, default 1
811
- Number of CPUs required for this step. If `@resources` is
812
- also present, the maximum value from all decorators is used.
813
- memory : int, default 4096
814
- Memory size (in MB) required for this step. If
815
- `@resources` is also present, the maximum value from all decorators is
816
- used.
817
- disk : int, default 10240
818
- Disk size (in MB) required for this step. If
819
- `@resources` is also present, the maximum value from all decorators is
820
- used.
821
- image : str, optional, default None
822
- Docker image to use when launching on Kubernetes. If not specified, and
823
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
824
- not, a default Docker image mapping to the current version of Python is used.
825
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
826
- If given, the imagePullPolicy to be applied to the Docker image of the step.
827
- image_pull_secrets: List[str], default []
828
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
829
- Kubernetes image pull secrets to use when pulling container images
830
- in Kubernetes.
831
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
832
- Kubernetes service account to use when launching pod in Kubernetes.
833
- secrets : List[str], optional, default None
834
- Kubernetes secrets to use when launching pod in Kubernetes. These
835
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
836
- in Metaflow configuration.
837
- node_selector: Union[Dict[str,str], str], optional, default None
838
- Kubernetes node selector(s) to apply to the pod running the task.
839
- Can be passed in as a comma separated string of values e.g.
840
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
841
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
842
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
843
- Kubernetes namespace to use when launching pod in Kubernetes.
844
- gpu : int, optional, default None
845
- Number of GPUs required for this step. A value of zero implies that
846
- the scheduled node should not have GPUs.
847
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
848
- The vendor of the GPUs to be used for this step.
849
- tolerations : List[str], default []
850
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
851
- Kubernetes tolerations to use when launching pod in Kubernetes.
852
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
853
- Kubernetes labels to use when launching pod in Kubernetes.
854
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
855
- Kubernetes annotations to use when launching pod in Kubernetes.
856
- use_tmpfs : bool, default False
857
- This enables an explicit tmpfs mount for this step.
858
- tmpfs_tempdir : bool, default True
859
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
860
- tmpfs_size : int, optional, default: None
861
- The value for the size (in MiB) of the tmpfs mount for this step.
862
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
863
- memory allocated for this step.
864
- tmpfs_path : str, optional, default /metaflow_temp
865
- Path to tmpfs mount for this step.
866
- persistent_volume_claims : Dict[str, str], optional, default None
867
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
868
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
869
- shared_memory: int, optional
870
- Shared memory size (in MiB) required for this step
871
- port: int, optional
872
- Port number to specify in the Kubernetes job object
873
- compute_pool : str, optional, default None
874
- Compute pool to be used for for this step.
875
- If not specified, any accessible compute pool within the perimeter is used.
876
- hostname_resolution_timeout: int, default 10 * 60
877
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
878
- Only applicable when @parallel is used.
879
- qos: str, default: Burstable
880
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
881
-
882
- security_context: Dict[str, Any], optional, default None
883
- Container security context. Applies to the task container. Allows the following keys:
884
- - privileged: bool, optional, default None
885
- - allow_privilege_escalation: bool, optional, default None
886
- - run_as_user: int, optional, default None
887
- - run_as_group: int, optional, default None
888
- - run_as_non_root: bool, optional, default None
889
- """
890
- ...
891
-
892
- @typing.overload
893
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
894
- """
895
- Creates a human-readable report, a Metaflow Card, after this step completes.
510
+ ```python
511
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
512
+ @step
513
+ def finetune_model(self):
514
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
515
+ # path_to_model will be /my-directory
516
+ ```
896
517
 
897
- Note that you may add multiple `@card` decorators in a step with different parameters.
518
+ ```python
519
+ # Takes all the arguments passed to `snapshot_download`
520
+ # except for `local_dir`
521
+ @huggingface_hub(load=[
522
+ {
523
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
524
+ },
525
+ {
526
+ "repo_id": "myorg/mistral-lora",
527
+ "repo_type": "model",
528
+ },
529
+ ])
530
+ @step
531
+ def finetune_model(self):
532
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
533
+ # path_to_model will be /my-directory
534
+ ```
898
535
 
899
536
 
900
537
  Parameters
901
538
  ----------
902
- type : str, default 'default'
903
- Card type.
904
- id : str, optional, default None
905
- If multiple cards are present, use this id to identify this card.
906
- options : Dict[str, Any], default {}
907
- Options passed to the card. The contents depend on the card type.
908
- timeout : int, default 45
909
- Interrupt reporting if it takes more than this many seconds.
539
+ temp_dir_root : str, optional
540
+ The root directory that will hold the temporary directory where objects will be downloaded.
541
+
542
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
543
+ The list of repos (models/datasets) to load.
544
+
545
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
546
+
547
+ - If repo (model/dataset) is not found in the datastore:
548
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
549
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
550
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
551
+
552
+ - If repo is found in the datastore:
553
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
910
554
  """
911
555
  ...
912
556
 
913
557
  @typing.overload
914
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
915
- ...
916
-
917
- @typing.overload
918
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
919
- ...
920
-
921
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
558
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
922
559
  """
923
- Creates a human-readable report, a Metaflow Card, after this step completes.
560
+ Enables checkpointing for a step.
924
561
 
925
- Note that you may add multiple `@card` decorators in a step with different parameters.
562
+ > Examples
563
+
564
+ - Saving Checkpoints
565
+
566
+ ```python
567
+ @checkpoint
568
+ @step
569
+ def train(self):
570
+ model = create_model(self.parameters, checkpoint_path = None)
571
+ for i in range(self.epochs):
572
+ # some training logic
573
+ loss = model.train(self.dataset)
574
+ if i % 10 == 0:
575
+ model.save(
576
+ current.checkpoint.directory,
577
+ )
578
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
579
+ # and returns a reference dictionary to the checkpoint saved in the datastore
580
+ self.latest_checkpoint = current.checkpoint.save(
581
+ name="epoch_checkpoint",
582
+ metadata={
583
+ "epoch": i,
584
+ "loss": loss,
585
+ }
586
+ )
587
+ ```
588
+
589
+ - Using Loaded Checkpoints
590
+
591
+ ```python
592
+ @retry(times=3)
593
+ @checkpoint
594
+ @step
595
+ def train(self):
596
+ # Assume that the task has restarted and the previous attempt of the task
597
+ # saved a checkpoint
598
+ checkpoint_path = None
599
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
600
+ print("Loaded checkpoint from the previous attempt")
601
+ checkpoint_path = current.checkpoint.directory
602
+
603
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
604
+ for i in range(self.epochs):
605
+ ...
606
+ ```
926
607
 
927
608
 
928
609
  Parameters
929
610
  ----------
930
- type : str, default 'default'
931
- Card type.
932
- id : str, optional, default None
933
- If multiple cards are present, use this id to identify this card.
934
- options : Dict[str, Any], default {}
935
- Options passed to the card. The contents depend on the card type.
936
- timeout : int, default 45
937
- Interrupt reporting if it takes more than this many seconds.
611
+ load_policy : str, default: "fresh"
612
+ The policy for loading the checkpoint. The following policies are supported:
613
+ - "eager": Loads the the latest available checkpoint within the namespace.
614
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
615
+ will be loaded at the start of the task.
616
+ - "none": Do not load any checkpoint
617
+ - "fresh": Loads the lastest checkpoint created within the running Task.
618
+ This mode helps loading checkpoints across various retry attempts of the same task.
619
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
620
+ created within the task will be loaded when the task is retries execution on failure.
621
+
622
+ temp_dir_root : str, default: None
623
+ The root directory under which `current.checkpoint.directory` will be created.
938
624
  """
939
625
  ...
940
626
 
941
627
  @typing.overload
942
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
943
- """
944
- Decorator prototype for all step decorators. This function gets specialized
945
- and imported for all decorators types by _import_plugin_decorators().
946
- """
628
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
947
629
  ...
948
630
 
949
631
  @typing.overload
950
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
951
- ...
952
-
953
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
954
- """
955
- Decorator prototype for all step decorators. This function gets specialized
956
- and imported for all decorators types by _import_plugin_decorators().
957
- """
632
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
958
633
  ...
959
634
 
960
- @typing.overload
961
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
635
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
962
636
  """
963
- Specifies the number of times the task corresponding
964
- to a step needs to be retried.
637
+ Enables checkpointing for a step.
965
638
 
966
- This decorator is useful for handling transient errors, such as networking issues.
967
- If your task contains operations that can't be retried safely, e.g. database updates,
968
- it is advisable to annotate it with `@retry(times=0)`.
639
+ > Examples
969
640
 
970
- This can be used in conjunction with the `@catch` decorator. The `@catch`
971
- decorator will execute a no-op task after all retries have been exhausted,
972
- ensuring that the flow execution can continue.
641
+ - Saving Checkpoints
642
+
643
+ ```python
644
+ @checkpoint
645
+ @step
646
+ def train(self):
647
+ model = create_model(self.parameters, checkpoint_path = None)
648
+ for i in range(self.epochs):
649
+ # some training logic
650
+ loss = model.train(self.dataset)
651
+ if i % 10 == 0:
652
+ model.save(
653
+ current.checkpoint.directory,
654
+ )
655
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
656
+ # and returns a reference dictionary to the checkpoint saved in the datastore
657
+ self.latest_checkpoint = current.checkpoint.save(
658
+ name="epoch_checkpoint",
659
+ metadata={
660
+ "epoch": i,
661
+ "loss": loss,
662
+ }
663
+ )
664
+ ```
665
+
666
+ - Using Loaded Checkpoints
667
+
668
+ ```python
669
+ @retry(times=3)
670
+ @checkpoint
671
+ @step
672
+ def train(self):
673
+ # Assume that the task has restarted and the previous attempt of the task
674
+ # saved a checkpoint
675
+ checkpoint_path = None
676
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
677
+ print("Loaded checkpoint from the previous attempt")
678
+ checkpoint_path = current.checkpoint.directory
679
+
680
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
681
+ for i in range(self.epochs):
682
+ ...
683
+ ```
973
684
 
974
685
 
975
686
  Parameters
976
687
  ----------
977
- times : int, default 3
978
- Number of times to retry this task.
979
- minutes_between_retries : int, default 2
980
- Number of minutes between retries.
688
+ load_policy : str, default: "fresh"
689
+ The policy for loading the checkpoint. The following policies are supported:
690
+ - "eager": Loads the the latest available checkpoint within the namespace.
691
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
692
+ will be loaded at the start of the task.
693
+ - "none": Do not load any checkpoint
694
+ - "fresh": Loads the lastest checkpoint created within the running Task.
695
+ This mode helps loading checkpoints across various retry attempts of the same task.
696
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
697
+ created within the task will be loaded when the task is retries execution on failure.
698
+
699
+ temp_dir_root : str, default: None
700
+ The root directory under which `current.checkpoint.directory` will be created.
981
701
  """
982
702
  ...
983
703
 
984
704
  @typing.overload
985
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
986
- ...
987
-
988
- @typing.overload
989
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
990
- ...
991
-
992
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
705
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
993
706
  """
994
- Specifies the number of times the task corresponding
995
- to a step needs to be retried.
996
-
997
- This decorator is useful for handling transient errors, such as networking issues.
998
- If your task contains operations that can't be retried safely, e.g. database updates,
999
- it is advisable to annotate it with `@retry(times=0)`.
707
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1000
708
 
1001
- This can be used in conjunction with the `@catch` decorator. The `@catch`
1002
- decorator will execute a no-op task after all retries have been exhausted,
1003
- ensuring that the flow execution can continue.
709
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1004
710
 
1005
711
 
1006
712
  Parameters
1007
713
  ----------
1008
- times : int, default 3
1009
- Number of times to retry this task.
1010
- minutes_between_retries : int, default 2
1011
- Number of minutes between retries.
714
+ type : str, default 'default'
715
+ Card type.
716
+ id : str, optional, default None
717
+ If multiple cards are present, use this id to identify this card.
718
+ options : Dict[str, Any], default {}
719
+ Options passed to the card. The contents depend on the card type.
720
+ timeout : int, default 45
721
+ Interrupt reporting if it takes more than this many seconds.
1012
722
  """
1013
723
  ...
1014
724
 
1015
725
  @typing.overload
1016
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
726
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
727
+ ...
728
+
729
+ @typing.overload
730
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
731
+ ...
732
+
733
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1017
734
  """
1018
- Specifies the Conda environment for the step.
735
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1019
736
 
1020
- Information in this decorator will augment any
1021
- attributes set in the `@conda_base` flow-level decorator. Hence,
1022
- you can use `@conda_base` to set packages required by all
1023
- steps and use `@conda` to specify step-specific overrides.
737
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1024
738
 
1025
739
 
1026
740
  Parameters
1027
741
  ----------
1028
- packages : Dict[str, str], default {}
1029
- Packages to use for this step. The key is the name of the package
1030
- and the value is the version to use.
1031
- libraries : Dict[str, str], default {}
1032
- Supported for backward compatibility. When used with packages, packages will take precedence.
1033
- python : str, optional, default None
1034
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1035
- that the version used will correspond to the version of the Python interpreter used to start the run.
1036
- disabled : bool, default False
1037
- If set to True, disables @conda.
742
+ type : str, default 'default'
743
+ Card type.
744
+ id : str, optional, default None
745
+ If multiple cards are present, use this id to identify this card.
746
+ options : Dict[str, Any], default {}
747
+ Options passed to the card. The contents depend on the card type.
748
+ timeout : int, default 45
749
+ Interrupt reporting if it takes more than this many seconds.
1038
750
  """
1039
751
  ...
1040
752
 
1041
753
  @typing.overload
1042
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
754
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
755
+ """
756
+ Internal decorator to support Fast bakery
757
+ """
1043
758
  ...
1044
759
 
1045
760
  @typing.overload
1046
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
761
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1047
762
  ...
1048
763
 
1049
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
764
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1050
765
  """
1051
- Specifies the Conda environment for the step.
1052
-
1053
- Information in this decorator will augment any
1054
- attributes set in the `@conda_base` flow-level decorator. Hence,
1055
- you can use `@conda_base` to set packages required by all
1056
- steps and use `@conda` to specify step-specific overrides.
766
+ Internal decorator to support Fast bakery
767
+ """
768
+ ...
769
+
770
+ def app_deploy(*, app_port: int, app_name: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
771
+ """
772
+ Specifies that this step is used to deploy an instance of the app.
773
+ Requires that self.app_name, self.app_port, self.entrypoint and self.deployDir is set.
1057
774
 
1058
775
 
1059
776
  Parameters
1060
777
  ----------
1061
- packages : Dict[str, str], default {}
1062
- Packages to use for this step. The key is the name of the package
1063
- and the value is the version to use.
1064
- libraries : Dict[str, str], default {}
1065
- Supported for backward compatibility. When used with packages, packages will take precedence.
1066
- python : str, optional, default None
1067
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1068
- that the version used will correspond to the version of the Python interpreter used to start the run.
1069
- disabled : bool, default False
1070
- If set to True, disables @conda.
778
+ app_port : int
779
+ Number of GPUs to use.
780
+ app_name : str
781
+ Name of the app to deploy.
1071
782
  """
1072
783
  ...
1073
784
 
1074
- @typing.overload
1075
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
785
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1076
786
  """
1077
- Enables loading / saving of models within a step.
787
+ Specifies that this step should execute on Kubernetes.
1078
788
 
1079
- > Examples
1080
- - Saving Models
1081
- ```python
1082
- @model
1083
- @step
1084
- def train(self):
1085
- # current.model.save returns a dictionary reference to the model saved
1086
- self.my_model = current.model.save(
1087
- path_to_my_model,
1088
- label="my_model",
1089
- metadata={
1090
- "epochs": 10,
1091
- "batch-size": 32,
1092
- "learning-rate": 0.001,
1093
- }
1094
- )
1095
- self.next(self.test)
1096
789
 
1097
- @model(load="my_model")
1098
- @step
1099
- def test(self):
1100
- # `current.model.loaded` returns a dictionary of the loaded models
1101
- # where the key is the name of the artifact and the value is the path to the model
1102
- print(os.listdir(current.model.loaded["my_model"]))
1103
- self.next(self.end)
1104
- ```
790
+ Parameters
791
+ ----------
792
+ cpu : int, default 1
793
+ Number of CPUs required for this step. If `@resources` is
794
+ also present, the maximum value from all decorators is used.
795
+ memory : int, default 4096
796
+ Memory size (in MB) required for this step. If
797
+ `@resources` is also present, the maximum value from all decorators is
798
+ used.
799
+ disk : int, default 10240
800
+ Disk size (in MB) required for this step. If
801
+ `@resources` is also present, the maximum value from all decorators is
802
+ used.
803
+ image : str, optional, default None
804
+ Docker image to use when launching on Kubernetes. If not specified, and
805
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
806
+ not, a default Docker image mapping to the current version of Python is used.
807
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
808
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
809
+ image_pull_secrets: List[str], default []
810
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
811
+ Kubernetes image pull secrets to use when pulling container images
812
+ in Kubernetes.
813
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
814
+ Kubernetes service account to use when launching pod in Kubernetes.
815
+ secrets : List[str], optional, default None
816
+ Kubernetes secrets to use when launching pod in Kubernetes. These
817
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
818
+ in Metaflow configuration.
819
+ node_selector: Union[Dict[str,str], str], optional, default None
820
+ Kubernetes node selector(s) to apply to the pod running the task.
821
+ Can be passed in as a comma separated string of values e.g.
822
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
823
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
824
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
825
+ Kubernetes namespace to use when launching pod in Kubernetes.
826
+ gpu : int, optional, default None
827
+ Number of GPUs required for this step. A value of zero implies that
828
+ the scheduled node should not have GPUs.
829
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
830
+ The vendor of the GPUs to be used for this step.
831
+ tolerations : List[str], default []
832
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
833
+ Kubernetes tolerations to use when launching pod in Kubernetes.
834
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
835
+ Kubernetes labels to use when launching pod in Kubernetes.
836
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
837
+ Kubernetes annotations to use when launching pod in Kubernetes.
838
+ use_tmpfs : bool, default False
839
+ This enables an explicit tmpfs mount for this step.
840
+ tmpfs_tempdir : bool, default True
841
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
842
+ tmpfs_size : int, optional, default: None
843
+ The value for the size (in MiB) of the tmpfs mount for this step.
844
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
845
+ memory allocated for this step.
846
+ tmpfs_path : str, optional, default /metaflow_temp
847
+ Path to tmpfs mount for this step.
848
+ persistent_volume_claims : Dict[str, str], optional, default None
849
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
850
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
851
+ shared_memory: int, optional
852
+ Shared memory size (in MiB) required for this step
853
+ port: int, optional
854
+ Port number to specify in the Kubernetes job object
855
+ compute_pool : str, optional, default None
856
+ Compute pool to be used for for this step.
857
+ If not specified, any accessible compute pool within the perimeter is used.
858
+ hostname_resolution_timeout: int, default 10 * 60
859
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
860
+ Only applicable when @parallel is used.
861
+ qos: str, default: Burstable
862
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
1105
863
 
1106
- - Loading models
1107
- ```python
1108
- @step
1109
- def train(self):
1110
- # current.model.load returns the path to the model loaded
1111
- checkpoint_path = current.model.load(
1112
- self.checkpoint_key,
1113
- )
1114
- model_path = current.model.load(
1115
- self.model,
1116
- )
1117
- self.next(self.test)
1118
- ```
864
+ security_context: Dict[str, Any], optional, default None
865
+ Container security context. Applies to the task container. Allows the following keys:
866
+ - privileged: bool, optional, default None
867
+ - allow_privilege_escalation: bool, optional, default None
868
+ - run_as_user: int, optional, default None
869
+ - run_as_group: int, optional, default None
870
+ - run_as_non_root: bool, optional, default None
871
+ """
872
+ ...
873
+
874
+ @typing.overload
875
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
876
+ """
877
+ Specifies environment variables to be set prior to the execution of a step.
1119
878
 
1120
879
 
1121
880
  Parameters
1122
881
  ----------
1123
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1124
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1125
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1126
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1127
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1128
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
1129
-
1130
- temp_dir_root : str, default: None
1131
- The root directory under which `current.model.loaded` will store loaded models
882
+ vars : Dict[str, str], default {}
883
+ Dictionary of environment variables to set.
1132
884
  """
1133
885
  ...
1134
886
 
1135
887
  @typing.overload
1136
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
888
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1137
889
  ...
1138
890
 
1139
891
  @typing.overload
1140
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
892
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1141
893
  ...
1142
894
 
1143
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
895
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1144
896
  """
1145
- Enables loading / saving of models within a step.
1146
-
1147
- > Examples
1148
- - Saving Models
1149
- ```python
1150
- @model
1151
- @step
1152
- def train(self):
1153
- # current.model.save returns a dictionary reference to the model saved
1154
- self.my_model = current.model.save(
1155
- path_to_my_model,
1156
- label="my_model",
1157
- metadata={
1158
- "epochs": 10,
1159
- "batch-size": 32,
1160
- "learning-rate": 0.001,
1161
- }
1162
- )
1163
- self.next(self.test)
897
+ Specifies environment variables to be set prior to the execution of a step.
1164
898
 
1165
- @model(load="my_model")
1166
- @step
1167
- def test(self):
1168
- # `current.model.loaded` returns a dictionary of the loaded models
1169
- # where the key is the name of the artifact and the value is the path to the model
1170
- print(os.listdir(current.model.loaded["my_model"]))
1171
- self.next(self.end)
1172
- ```
1173
899
 
1174
- - Loading models
1175
- ```python
1176
- @step
1177
- def train(self):
1178
- # current.model.load returns the path to the model loaded
1179
- checkpoint_path = current.model.load(
1180
- self.checkpoint_key,
1181
- )
1182
- model_path = current.model.load(
1183
- self.model,
1184
- )
1185
- self.next(self.test)
1186
- ```
900
+ Parameters
901
+ ----------
902
+ vars : Dict[str, str], default {}
903
+ Dictionary of environment variables to set.
904
+ """
905
+ ...
906
+
907
+ @typing.overload
908
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
909
+ """
910
+ Specifies secrets to be retrieved and injected as environment variables prior to
911
+ the execution of a step.
1187
912
 
1188
913
 
1189
914
  Parameters
1190
915
  ----------
1191
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1192
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1193
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1194
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1195
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1196
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
916
+ sources : List[Union[str, Dict[str, Any]]], default: []
917
+ List of secret specs, defining how the secrets are to be retrieved
918
+ """
919
+ ...
920
+
921
+ @typing.overload
922
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
923
+ ...
924
+
925
+ @typing.overload
926
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
927
+ ...
928
+
929
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
930
+ """
931
+ Specifies secrets to be retrieved and injected as environment variables prior to
932
+ the execution of a step.
1197
933
 
1198
- temp_dir_root : str, default: None
1199
- The root directory under which `current.model.loaded` will store loaded models
934
+
935
+ Parameters
936
+ ----------
937
+ sources : List[Union[str, Dict[str, Any]]], default: []
938
+ List of secret specs, defining how the secrets are to be retrieved
1200
939
  """
1201
940
  ...
1202
941
 
1203
942
  @typing.overload
1204
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
943
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1205
944
  """
1206
- Specifies environment variables to be set prior to the execution of a step.
945
+ Specifies the resources needed when executing this step.
946
+
947
+ Use `@resources` to specify the resource requirements
948
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
949
+
950
+ You can choose the compute layer on the command line by executing e.g.
951
+ ```
952
+ python myflow.py run --with batch
953
+ ```
954
+ or
955
+ ```
956
+ python myflow.py run --with kubernetes
957
+ ```
958
+ which executes the flow on the desired system using the
959
+ requirements specified in `@resources`.
1207
960
 
1208
961
 
1209
962
  Parameters
1210
963
  ----------
1211
- vars : Dict[str, str], default {}
1212
- Dictionary of environment variables to set.
964
+ cpu : int, default 1
965
+ Number of CPUs required for this step.
966
+ gpu : int, optional, default None
967
+ Number of GPUs required for this step.
968
+ disk : int, optional, default None
969
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
970
+ memory : int, default 4096
971
+ Memory size (in MB) required for this step.
972
+ shared_memory : int, optional, default None
973
+ The value for the size (in MiB) of the /dev/shm volume for this step.
974
+ This parameter maps to the `--shm-size` option in Docker.
1213
975
  """
1214
976
  ...
1215
977
 
1216
978
  @typing.overload
1217
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
979
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1218
980
  ...
1219
981
 
1220
982
  @typing.overload
1221
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
983
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1222
984
  ...
1223
985
 
1224
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
986
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1225
987
  """
1226
- Specifies environment variables to be set prior to the execution of a step.
988
+ Specifies the resources needed when executing this step.
989
+
990
+ Use `@resources` to specify the resource requirements
991
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
992
+
993
+ You can choose the compute layer on the command line by executing e.g.
994
+ ```
995
+ python myflow.py run --with batch
996
+ ```
997
+ or
998
+ ```
999
+ python myflow.py run --with kubernetes
1000
+ ```
1001
+ which executes the flow on the desired system using the
1002
+ requirements specified in `@resources`.
1227
1003
 
1228
1004
 
1229
1005
  Parameters
1230
1006
  ----------
1231
- vars : Dict[str, str], default {}
1232
- Dictionary of environment variables to set.
1007
+ cpu : int, default 1
1008
+ Number of CPUs required for this step.
1009
+ gpu : int, optional, default None
1010
+ Number of GPUs required for this step.
1011
+ disk : int, optional, default None
1012
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1013
+ memory : int, default 4096
1014
+ Memory size (in MB) required for this step.
1015
+ shared_memory : int, optional, default None
1016
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1017
+ This parameter maps to the `--shm-size` option in Docker.
1018
+ """
1019
+ ...
1020
+
1021
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1022
+ """
1023
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
1024
+
1025
+ User code call
1026
+ --------------
1027
+ @vllm(
1028
+ model="...",
1029
+ ...
1030
+ )
1031
+
1032
+ Valid backend options
1033
+ ---------------------
1034
+ - 'local': Run as a separate process on the local task machine.
1035
+
1036
+ Valid model options
1037
+ -------------------
1038
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1039
+
1040
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1041
+ If you need multiple models, you must create multiple @vllm decorators.
1042
+
1043
+
1044
+ Parameters
1045
+ ----------
1046
+ model: str
1047
+ HuggingFace model identifier to be served by vLLM.
1048
+ backend: str
1049
+ Determines where and how to run the vLLM process.
1050
+ openai_api_server: bool
1051
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1052
+ Default is False (uses native engine).
1053
+ Set to True for backward compatibility with existing code.
1054
+ debug: bool
1055
+ Whether to turn on verbose debugging logs.
1056
+ card_refresh_interval: int
1057
+ Interval in seconds for refreshing the vLLM status card.
1058
+ Only used when openai_api_server=True.
1059
+ max_retries: int
1060
+ Maximum number of retries checking for vLLM server startup.
1061
+ Only used when openai_api_server=True.
1062
+ retry_alert_frequency: int
1063
+ Frequency of alert logs for vLLM server startup retries.
1064
+ Only used when openai_api_server=True.
1065
+ engine_args : dict
1066
+ Additional keyword arguments to pass to the vLLM engine.
1067
+ For example, `tensor_parallel_size=2`.
1233
1068
  """
1234
1069
  ...
1235
1070
 
1236
1071
  @typing.overload
1237
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1072
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1238
1073
  """
1239
- Specifies the PyPI packages for all steps of the flow.
1074
+ Specifies the PyPI packages for the step.
1240
1075
 
1241
- Use `@pypi_base` to set common packages required by all
1076
+ Information in this decorator will augment any
1077
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1078
+ you can use `@pypi_base` to set packages required by all
1242
1079
  steps and use `@pypi` to specify step-specific overrides.
1243
1080
 
1081
+
1244
1082
  Parameters
1245
1083
  ----------
1246
1084
  packages : Dict[str, str], default: {}
1247
- Packages to use for this flow. The key is the name of the package
1085
+ Packages to use for this step. The key is the name of the package
1248
1086
  and the value is the version to use.
1249
1087
  python : str, optional, default: None
1250
1088
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
@@ -1253,20 +1091,27 @@ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[s
1253
1091
  ...
1254
1092
 
1255
1093
  @typing.overload
1256
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1094
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1257
1095
  ...
1258
1096
 
1259
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1097
+ @typing.overload
1098
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1099
+ ...
1100
+
1101
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1260
1102
  """
1261
- Specifies the PyPI packages for all steps of the flow.
1103
+ Specifies the PyPI packages for the step.
1262
1104
 
1263
- Use `@pypi_base` to set common packages required by all
1105
+ Information in this decorator will augment any
1106
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1107
+ you can use `@pypi_base` to set packages required by all
1264
1108
  steps and use `@pypi` to specify step-specific overrides.
1265
1109
 
1110
+
1266
1111
  Parameters
1267
1112
  ----------
1268
1113
  packages : Dict[str, str], default: {}
1269
- Packages to use for this flow. The key is the name of the package
1114
+ Packages to use for this step. The key is the name of the package
1270
1115
  and the value is the version to use.
1271
1116
  python : str, optional, default: None
1272
1117
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
@@ -1274,217 +1119,129 @@ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packag
1274
1119
  """
1275
1120
  ...
1276
1121
 
1277
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1122
+ @typing.overload
1123
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1278
1124
  """
1279
- Specifies what flows belong to the same project.
1125
+ Specifies that the step will success under all circumstances.
1280
1126
 
1281
- A project-specific namespace is created for all flows that
1282
- use the same `@project(name)`.
1127
+ The decorator will create an optional artifact, specified by `var`, which
1128
+ contains the exception raised. You can use it to detect the presence
1129
+ of errors, indicating that all happy-path artifacts produced by the step
1130
+ are missing.
1283
1131
 
1284
1132
 
1285
1133
  Parameters
1286
1134
  ----------
1287
- name : str
1288
- Project name. Make sure that the name is unique amongst all
1289
- projects that use the same production scheduler. The name may
1290
- contain only lowercase alphanumeric characters and underscores.
1291
-
1292
- branch : Optional[str], default None
1293
- The branch to use. If not specified, the branch is set to
1294
- `user.<username>` unless `production` is set to `True`. This can
1295
- also be set on the command line using `--branch` as a top-level option.
1296
- It is an error to specify `branch` in the decorator and on the command line.
1297
-
1298
- production : bool, default False
1299
- Whether or not the branch is the production branch. This can also be set on the
1300
- command line using `--production` as a top-level option. It is an error to specify
1301
- `production` in the decorator and on the command line.
1302
- The project branch name will be:
1303
- - if `branch` is specified:
1304
- - if `production` is True: `prod.<branch>`
1305
- - if `production` is False: `test.<branch>`
1306
- - if `branch` is not specified:
1307
- - if `production` is True: `prod`
1308
- - if `production` is False: `user.<username>`
1135
+ var : str, optional, default None
1136
+ Name of the artifact in which to store the caught exception.
1137
+ If not specified, the exception is not stored.
1138
+ print_exception : bool, default True
1139
+ Determines whether or not the exception is printed to
1140
+ stdout when caught.
1309
1141
  """
1310
1142
  ...
1311
1143
 
1312
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1144
+ @typing.overload
1145
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1146
+ ...
1147
+
1148
+ @typing.overload
1149
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1150
+ ...
1151
+
1152
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1313
1153
  """
1314
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1315
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1316
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1317
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1318
- starts only after all sensors finish.
1154
+ Specifies that the step will success under all circumstances.
1155
+
1156
+ The decorator will create an optional artifact, specified by `var`, which
1157
+ contains the exception raised. You can use it to detect the presence
1158
+ of errors, indicating that all happy-path artifacts produced by the step
1159
+ are missing.
1319
1160
 
1320
1161
 
1321
1162
  Parameters
1322
1163
  ----------
1323
- timeout : int
1324
- Time, in seconds before the task times out and fails. (Default: 3600)
1325
- poke_interval : int
1326
- Time in seconds that the job should wait in between each try. (Default: 60)
1327
- mode : str
1328
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1329
- exponential_backoff : bool
1330
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1331
- pool : str
1332
- the slot pool this task should run in,
1333
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1334
- soft_fail : bool
1335
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1336
- name : str
1337
- Name of the sensor on Airflow
1338
- description : str
1339
- Description of sensor in the Airflow UI
1340
- bucket_key : Union[str, List[str]]
1341
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1342
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1343
- bucket_name : str
1344
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1345
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1346
- wildcard_match : bool
1347
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1348
- aws_conn_id : str
1349
- a reference to the s3 connection on Airflow. (Default: None)
1350
- verify : bool
1351
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1164
+ var : str, optional, default None
1165
+ Name of the artifact in which to store the caught exception.
1166
+ If not specified, the exception is not stored.
1167
+ print_exception : bool, default True
1168
+ Determines whether or not the exception is printed to
1169
+ stdout when caught.
1352
1170
  """
1353
1171
  ...
1354
1172
 
1355
1173
  @typing.overload
1356
- def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1174
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1357
1175
  """
1358
- Specifies the event(s) that this flow depends on.
1176
+ Specifies a timeout for your step.
1359
1177
 
1360
- ```
1361
- @trigger(event='foo')
1362
- ```
1363
- or
1364
- ```
1365
- @trigger(events=['foo', 'bar'])
1366
- ```
1178
+ This decorator is useful if this step may hang indefinitely.
1367
1179
 
1368
- Additionally, you can specify the parameter mappings
1369
- to map event payload to Metaflow parameters for the flow.
1370
- ```
1371
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1372
- ```
1373
- or
1374
- ```
1375
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1376
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1377
- ```
1180
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1181
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1182
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1378
1183
 
1379
- 'parameters' can also be a list of strings and tuples like so:
1380
- ```
1381
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1382
- ```
1383
- This is equivalent to:
1384
- ```
1385
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1386
- ```
1184
+ Note that all the values specified in parameters are added together so if you specify
1185
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1387
1186
 
1388
1187
 
1389
1188
  Parameters
1390
1189
  ----------
1391
- event : Union[str, Dict[str, Any]], optional, default None
1392
- Event dependency for this flow.
1393
- events : List[Union[str, Dict[str, Any]]], default []
1394
- Events dependency for this flow.
1395
- options : Dict[str, Any], default {}
1396
- Backend-specific configuration for tuning eventing behavior.
1190
+ seconds : int, default 0
1191
+ Number of seconds to wait prior to timing out.
1192
+ minutes : int, default 0
1193
+ Number of minutes to wait prior to timing out.
1194
+ hours : int, default 0
1195
+ Number of hours to wait prior to timing out.
1397
1196
  """
1398
1197
  ...
1399
1198
 
1400
1199
  @typing.overload
1401
- def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1200
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1402
1201
  ...
1403
1202
 
1404
- def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1203
+ @typing.overload
1204
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1205
+ ...
1206
+
1207
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1405
1208
  """
1406
- Specifies the event(s) that this flow depends on.
1209
+ Specifies a timeout for your step.
1407
1210
 
1408
- ```
1409
- @trigger(event='foo')
1410
- ```
1411
- or
1412
- ```
1413
- @trigger(events=['foo', 'bar'])
1414
- ```
1211
+ This decorator is useful if this step may hang indefinitely.
1415
1212
 
1416
- Additionally, you can specify the parameter mappings
1417
- to map event payload to Metaflow parameters for the flow.
1418
- ```
1419
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1420
- ```
1421
- or
1422
- ```
1423
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1424
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1425
- ```
1213
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1214
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1215
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1426
1216
 
1427
- 'parameters' can also be a list of strings and tuples like so:
1428
- ```
1429
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1430
- ```
1431
- This is equivalent to:
1432
- ```
1433
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1434
- ```
1217
+ Note that all the values specified in parameters are added together so if you specify
1218
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1435
1219
 
1436
1220
 
1437
1221
  Parameters
1438
1222
  ----------
1439
- event : Union[str, Dict[str, Any]], optional, default None
1440
- Event dependency for this flow.
1441
- events : List[Union[str, Dict[str, Any]]], default []
1442
- Events dependency for this flow.
1443
- options : Dict[str, Any], default {}
1444
- Backend-specific configuration for tuning eventing behavior.
1223
+ seconds : int, default 0
1224
+ Number of seconds to wait prior to timing out.
1225
+ minutes : int, default 0
1226
+ Number of minutes to wait prior to timing out.
1227
+ hours : int, default 0
1228
+ Number of hours to wait prior to timing out.
1445
1229
  """
1446
1230
  ...
1447
1231
 
1448
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1232
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1449
1233
  """
1450
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1451
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1234
+ Specifies that this step should execute on DGX cloud.
1452
1235
 
1453
1236
 
1454
1237
  Parameters
1455
1238
  ----------
1456
- timeout : int
1457
- Time, in seconds before the task times out and fails. (Default: 3600)
1458
- poke_interval : int
1459
- Time in seconds that the job should wait in between each try. (Default: 60)
1460
- mode : str
1461
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1462
- exponential_backoff : bool
1463
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1464
- pool : str
1465
- the slot pool this task should run in,
1466
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1467
- soft_fail : bool
1468
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1469
- name : str
1470
- Name of the sensor on Airflow
1471
- description : str
1472
- Description of sensor in the Airflow UI
1473
- external_dag_id : str
1474
- The dag_id that contains the task you want to wait for.
1475
- external_task_ids : List[str]
1476
- The list of task_ids that you want to wait for.
1477
- If None (default value) the sensor waits for the DAG. (Default: None)
1478
- allowed_states : List[str]
1479
- Iterable of allowed states, (Default: ['success'])
1480
- failed_states : List[str]
1481
- Iterable of failed or dis-allowed states. (Default: None)
1482
- execution_delta : datetime.timedelta
1483
- time difference with the previous execution to look at,
1484
- the default is the same logical date as the current task or DAG. (Default: None)
1485
- check_existence: bool
1486
- Set to True to check if the external task exists or check if
1487
- the DAG to wait for exists. (Default: True)
1239
+ gpu : int
1240
+ Number of GPUs to use.
1241
+ gpu_type : str
1242
+ Type of Nvidia GPU to use.
1243
+ queue_timeout : int
1244
+ Time to keep the job in NVCF's queue.
1488
1245
  """
1489
1246
  ...
1490
1247
 
@@ -1590,104 +1347,136 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1590
1347
  ...
1591
1348
 
1592
1349
  @typing.overload
1593
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1350
+ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1594
1351
  """
1595
- Specifies the times when the flow should be run when running on a
1596
- production scheduler.
1352
+ Specifies the event(s) that this flow depends on.
1353
+
1354
+ ```
1355
+ @trigger(event='foo')
1356
+ ```
1357
+ or
1358
+ ```
1359
+ @trigger(events=['foo', 'bar'])
1360
+ ```
1361
+
1362
+ Additionally, you can specify the parameter mappings
1363
+ to map event payload to Metaflow parameters for the flow.
1364
+ ```
1365
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1366
+ ```
1367
+ or
1368
+ ```
1369
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1370
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1371
+ ```
1372
+
1373
+ 'parameters' can also be a list of strings and tuples like so:
1374
+ ```
1375
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1376
+ ```
1377
+ This is equivalent to:
1378
+ ```
1379
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1380
+ ```
1597
1381
 
1598
1382
 
1599
1383
  Parameters
1600
1384
  ----------
1601
- hourly : bool, default False
1602
- Run the workflow hourly.
1603
- daily : bool, default True
1604
- Run the workflow daily.
1605
- weekly : bool, default False
1606
- Run the workflow weekly.
1607
- cron : str, optional, default None
1608
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1609
- specified by this expression.
1610
- timezone : str, optional, default None
1611
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1612
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1385
+ event : Union[str, Dict[str, Any]], optional, default None
1386
+ Event dependency for this flow.
1387
+ events : List[Union[str, Dict[str, Any]]], default []
1388
+ Events dependency for this flow.
1389
+ options : Dict[str, Any], default {}
1390
+ Backend-specific configuration for tuning eventing behavior.
1613
1391
  """
1614
1392
  ...
1615
1393
 
1616
1394
  @typing.overload
1617
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1395
+ def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1618
1396
  ...
1619
1397
 
1620
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1398
+ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1621
1399
  """
1622
- Specifies the times when the flow should be run when running on a
1623
- production scheduler.
1400
+ Specifies the event(s) that this flow depends on.
1401
+
1402
+ ```
1403
+ @trigger(event='foo')
1404
+ ```
1405
+ or
1406
+ ```
1407
+ @trigger(events=['foo', 'bar'])
1408
+ ```
1409
+
1410
+ Additionally, you can specify the parameter mappings
1411
+ to map event payload to Metaflow parameters for the flow.
1412
+ ```
1413
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1414
+ ```
1415
+ or
1416
+ ```
1417
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1418
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1419
+ ```
1420
+
1421
+ 'parameters' can also be a list of strings and tuples like so:
1422
+ ```
1423
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1424
+ ```
1425
+ This is equivalent to:
1426
+ ```
1427
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1428
+ ```
1624
1429
 
1625
1430
 
1626
1431
  Parameters
1627
1432
  ----------
1628
- hourly : bool, default False
1629
- Run the workflow hourly.
1630
- daily : bool, default True
1631
- Run the workflow daily.
1632
- weekly : bool, default False
1633
- Run the workflow weekly.
1634
- cron : str, optional, default None
1635
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1636
- specified by this expression.
1637
- timezone : str, optional, default None
1638
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1639
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1433
+ event : Union[str, Dict[str, Any]], optional, default None
1434
+ Event dependency for this flow.
1435
+ events : List[Union[str, Dict[str, Any]]], default []
1436
+ Events dependency for this flow.
1437
+ options : Dict[str, Any], default {}
1438
+ Backend-specific configuration for tuning eventing behavior.
1640
1439
  """
1641
1440
  ...
1642
1441
 
1643
- @typing.overload
1644
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1645
- """
1646
- Specifies the Conda environment for all steps of the flow.
1647
-
1648
- Use `@conda_base` to set common libraries required by all
1649
- steps and use `@conda` to specify step-specific additions.
1442
+ @typing.overload
1443
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1444
+ """
1445
+ Specifies the PyPI packages for all steps of the flow.
1650
1446
 
1447
+ Use `@pypi_base` to set common packages required by all
1448
+ steps and use `@pypi` to specify step-specific overrides.
1651
1449
 
1652
1450
  Parameters
1653
1451
  ----------
1654
- packages : Dict[str, str], default {}
1452
+ packages : Dict[str, str], default: {}
1655
1453
  Packages to use for this flow. The key is the name of the package
1656
1454
  and the value is the version to use.
1657
- libraries : Dict[str, str], default {}
1658
- Supported for backward compatibility. When used with packages, packages will take precedence.
1659
- python : str, optional, default None
1455
+ python : str, optional, default: None
1660
1456
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1661
1457
  that the version used will correspond to the version of the Python interpreter used to start the run.
1662
- disabled : bool, default False
1663
- If set to True, disables Conda.
1664
1458
  """
1665
1459
  ...
1666
1460
 
1667
1461
  @typing.overload
1668
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1462
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1669
1463
  ...
1670
1464
 
1671
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1465
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1672
1466
  """
1673
- Specifies the Conda environment for all steps of the flow.
1674
-
1675
- Use `@conda_base` to set common libraries required by all
1676
- steps and use `@conda` to specify step-specific additions.
1467
+ Specifies the PyPI packages for all steps of the flow.
1677
1468
 
1469
+ Use `@pypi_base` to set common packages required by all
1470
+ steps and use `@pypi` to specify step-specific overrides.
1678
1471
 
1679
1472
  Parameters
1680
1473
  ----------
1681
- packages : Dict[str, str], default {}
1474
+ packages : Dict[str, str], default: {}
1682
1475
  Packages to use for this flow. The key is the name of the package
1683
1476
  and the value is the version to use.
1684
- libraries : Dict[str, str], default {}
1685
- Supported for backward compatibility. When used with packages, packages will take precedence.
1686
- python : str, optional, default None
1477
+ python : str, optional, default: None
1687
1478
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1688
1479
  that the version used will correspond to the version of the Python interpreter used to start the run.
1689
- disabled : bool, default False
1690
- If set to True, disables Conda.
1691
1480
  """
1692
1481
  ...
1693
1482
 
@@ -1805,5 +1594,228 @@ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None)
1805
1594
  """
1806
1595
  ...
1807
1596
 
1597
+ @typing.overload
1598
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1599
+ """
1600
+ Specifies the times when the flow should be run when running on a
1601
+ production scheduler.
1602
+
1603
+
1604
+ Parameters
1605
+ ----------
1606
+ hourly : bool, default False
1607
+ Run the workflow hourly.
1608
+ daily : bool, default True
1609
+ Run the workflow daily.
1610
+ weekly : bool, default False
1611
+ Run the workflow weekly.
1612
+ cron : str, optional, default None
1613
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1614
+ specified by this expression.
1615
+ timezone : str, optional, default None
1616
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1617
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1618
+ """
1619
+ ...
1620
+
1621
+ @typing.overload
1622
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1623
+ ...
1624
+
1625
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1626
+ """
1627
+ Specifies the times when the flow should be run when running on a
1628
+ production scheduler.
1629
+
1630
+
1631
+ Parameters
1632
+ ----------
1633
+ hourly : bool, default False
1634
+ Run the workflow hourly.
1635
+ daily : bool, default True
1636
+ Run the workflow daily.
1637
+ weekly : bool, default False
1638
+ Run the workflow weekly.
1639
+ cron : str, optional, default None
1640
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1641
+ specified by this expression.
1642
+ timezone : str, optional, default None
1643
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1644
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1645
+ """
1646
+ ...
1647
+
1648
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1649
+ """
1650
+ Specifies what flows belong to the same project.
1651
+
1652
+ A project-specific namespace is created for all flows that
1653
+ use the same `@project(name)`.
1654
+
1655
+
1656
+ Parameters
1657
+ ----------
1658
+ name : str
1659
+ Project name. Make sure that the name is unique amongst all
1660
+ projects that use the same production scheduler. The name may
1661
+ contain only lowercase alphanumeric characters and underscores.
1662
+
1663
+ branch : Optional[str], default None
1664
+ The branch to use. If not specified, the branch is set to
1665
+ `user.<username>` unless `production` is set to `True`. This can
1666
+ also be set on the command line using `--branch` as a top-level option.
1667
+ It is an error to specify `branch` in the decorator and on the command line.
1668
+
1669
+ production : bool, default False
1670
+ Whether or not the branch is the production branch. This can also be set on the
1671
+ command line using `--production` as a top-level option. It is an error to specify
1672
+ `production` in the decorator and on the command line.
1673
+ The project branch name will be:
1674
+ - if `branch` is specified:
1675
+ - if `production` is True: `prod.<branch>`
1676
+ - if `production` is False: `test.<branch>`
1677
+ - if `branch` is not specified:
1678
+ - if `production` is True: `prod`
1679
+ - if `production` is False: `user.<username>`
1680
+ """
1681
+ ...
1682
+
1683
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1684
+ """
1685
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1686
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1687
+
1688
+
1689
+ Parameters
1690
+ ----------
1691
+ timeout : int
1692
+ Time, in seconds before the task times out and fails. (Default: 3600)
1693
+ poke_interval : int
1694
+ Time in seconds that the job should wait in between each try. (Default: 60)
1695
+ mode : str
1696
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1697
+ exponential_backoff : bool
1698
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1699
+ pool : str
1700
+ the slot pool this task should run in,
1701
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1702
+ soft_fail : bool
1703
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1704
+ name : str
1705
+ Name of the sensor on Airflow
1706
+ description : str
1707
+ Description of sensor in the Airflow UI
1708
+ external_dag_id : str
1709
+ The dag_id that contains the task you want to wait for.
1710
+ external_task_ids : List[str]
1711
+ The list of task_ids that you want to wait for.
1712
+ If None (default value) the sensor waits for the DAG. (Default: None)
1713
+ allowed_states : List[str]
1714
+ Iterable of allowed states, (Default: ['success'])
1715
+ failed_states : List[str]
1716
+ Iterable of failed or dis-allowed states. (Default: None)
1717
+ execution_delta : datetime.timedelta
1718
+ time difference with the previous execution to look at,
1719
+ the default is the same logical date as the current task or DAG. (Default: None)
1720
+ check_existence: bool
1721
+ Set to True to check if the external task exists or check if
1722
+ the DAG to wait for exists. (Default: True)
1723
+ """
1724
+ ...
1725
+
1726
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1727
+ """
1728
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1729
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1730
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1731
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1732
+ starts only after all sensors finish.
1733
+
1734
+
1735
+ Parameters
1736
+ ----------
1737
+ timeout : int
1738
+ Time, in seconds before the task times out and fails. (Default: 3600)
1739
+ poke_interval : int
1740
+ Time in seconds that the job should wait in between each try. (Default: 60)
1741
+ mode : str
1742
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1743
+ exponential_backoff : bool
1744
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1745
+ pool : str
1746
+ the slot pool this task should run in,
1747
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1748
+ soft_fail : bool
1749
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1750
+ name : str
1751
+ Name of the sensor on Airflow
1752
+ description : str
1753
+ Description of sensor in the Airflow UI
1754
+ bucket_key : Union[str, List[str]]
1755
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1756
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1757
+ bucket_name : str
1758
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1759
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1760
+ wildcard_match : bool
1761
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1762
+ aws_conn_id : str
1763
+ a reference to the s3 connection on Airflow. (Default: None)
1764
+ verify : bool
1765
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1766
+ """
1767
+ ...
1768
+
1769
+ @typing.overload
1770
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1771
+ """
1772
+ Specifies the Conda environment for all steps of the flow.
1773
+
1774
+ Use `@conda_base` to set common libraries required by all
1775
+ steps and use `@conda` to specify step-specific additions.
1776
+
1777
+
1778
+ Parameters
1779
+ ----------
1780
+ packages : Dict[str, str], default {}
1781
+ Packages to use for this flow. The key is the name of the package
1782
+ and the value is the version to use.
1783
+ libraries : Dict[str, str], default {}
1784
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1785
+ python : str, optional, default None
1786
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1787
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1788
+ disabled : bool, default False
1789
+ If set to True, disables Conda.
1790
+ """
1791
+ ...
1792
+
1793
+ @typing.overload
1794
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1795
+ ...
1796
+
1797
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1798
+ """
1799
+ Specifies the Conda environment for all steps of the flow.
1800
+
1801
+ Use `@conda_base` to set common libraries required by all
1802
+ steps and use `@conda` to specify step-specific additions.
1803
+
1804
+
1805
+ Parameters
1806
+ ----------
1807
+ packages : Dict[str, str], default {}
1808
+ Packages to use for this flow. The key is the name of the package
1809
+ and the value is the version to use.
1810
+ libraries : Dict[str, str], default {}
1811
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1812
+ python : str, optional, default None
1813
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1814
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1815
+ disabled : bool, default False
1816
+ If set to True, disables Conda.
1817
+ """
1818
+ ...
1819
+
1808
1820
  pkg_name: str
1809
1821