ob-metaflow-stubs 6.0.4.6rc0__py2.py3-none-any.whl → 6.0.4.7__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. metaflow-stubs/__init__.pyi +944 -925
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +6 -6
  8. metaflow-stubs/client/filecache.pyi +3 -3
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +4 -4
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/info_file.pyi +2 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +2 -2
  20. metaflow-stubs/metaflow_current.pyi +53 -53
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +2 -2
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +3 -3
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +4 -4
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +3 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +3 -3
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +4 -4
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +3 -3
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +3 -3
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +3 -3
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +3 -3
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +2 -2
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +3 -3
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +2 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +3 -3
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +4 -4
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +2 -2
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +4 -4
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +3 -3
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +2 -2
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +3 -3
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +3 -3
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +2 -2
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +2 -2
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -2
  115. metaflow-stubs/multicore_utils.pyi +2 -2
  116. metaflow-stubs/ob_internal.pyi +2 -2
  117. metaflow-stubs/parameters.pyi +3 -3
  118. metaflow-stubs/plugins/__init__.pyi +10 -10
  119. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  120. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  121. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  122. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  123. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  124. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  125. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  126. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  127. metaflow-stubs/plugins/argo/argo_client.pyi +3 -3
  128. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  129. metaflow-stubs/plugins/argo/argo_workflows.pyi +3 -3
  130. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  131. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  132. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +20 -2
  133. metaflow-stubs/plugins/argo/exit_hooks.pyi +2 -2
  134. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  135. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  136. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  137. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  138. metaflow-stubs/plugins/aws/batch/batch.pyi +2 -2
  139. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  140. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -2
  141. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  142. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +4 -4
  143. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  144. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  145. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  146. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +2 -2
  147. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  148. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  149. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +13 -2
  150. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  151. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  152. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  153. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +4 -4
  154. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  155. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  156. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  157. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  158. metaflow-stubs/plugins/cards/card_client.pyi +3 -3
  159. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  160. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  161. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  162. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  163. metaflow-stubs/plugins/cards/card_modules/basic.pyi +3 -3
  164. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_modules/components.pyi +3 -3
  166. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  167. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  169. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  170. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  171. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  172. metaflow-stubs/plugins/catch_decorator.pyi +3 -3
  173. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  174. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  175. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  176. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  177. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  178. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  179. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  180. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  181. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  182. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  183. metaflow-stubs/plugins/exit_hook/__init__.pyi +2 -2
  184. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +2 -2
  185. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  186. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  187. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  188. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +4 -4
  189. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  190. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  191. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  192. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  193. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  194. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  195. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +2 -2
  196. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  197. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +2 -2
  198. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  199. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  200. metaflow-stubs/plugins/ollama/__init__.pyi +3 -3
  201. metaflow-stubs/plugins/parallel_decorator.pyi +2 -2
  202. metaflow-stubs/plugins/perimeters.pyi +2 -2
  203. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  204. metaflow-stubs/plugins/pypi/__init__.pyi +3 -3
  205. metaflow-stubs/plugins/pypi/conda_decorator.pyi +2 -2
  206. metaflow-stubs/plugins/pypi/conda_environment.pyi +5 -5
  207. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  208. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +2 -2
  209. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  210. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  211. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  212. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  213. metaflow-stubs/plugins/secrets/__init__.pyi +3 -3
  214. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +3 -3
  215. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  216. metaflow-stubs/plugins/secrets/secrets_func.pyi +2 -2
  217. metaflow-stubs/plugins/secrets/secrets_spec.pyi +2 -2
  218. metaflow-stubs/plugins/secrets/utils.pyi +2 -2
  219. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  220. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  221. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +3 -3
  222. metaflow-stubs/plugins/timeout_decorator.pyi +3 -3
  223. metaflow-stubs/plugins/torchtune/__init__.pyi +2 -2
  224. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  225. metaflow-stubs/plugins/uv/uv_environment.pyi +3 -3
  226. metaflow-stubs/profilers/__init__.pyi +2 -2
  227. metaflow-stubs/pylint_wrapper.pyi +2 -2
  228. metaflow-stubs/runner/__init__.pyi +2 -2
  229. metaflow-stubs/runner/deployer.pyi +30 -5
  230. metaflow-stubs/runner/deployer_impl.pyi +3 -3
  231. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  232. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  233. metaflow-stubs/runner/nbrun.pyi +2 -2
  234. metaflow-stubs/runner/subprocess_manager.pyi +2 -2
  235. metaflow-stubs/runner/utils.pyi +3 -3
  236. metaflow-stubs/system/__init__.pyi +2 -2
  237. metaflow-stubs/system/system_logger.pyi +2 -2
  238. metaflow-stubs/system/system_monitor.pyi +2 -2
  239. metaflow-stubs/tagging_util.pyi +2 -2
  240. metaflow-stubs/tuple_util.pyi +2 -2
  241. metaflow-stubs/user_configs/__init__.pyi +2 -2
  242. metaflow-stubs/user_configs/config_decorators.pyi +7 -7
  243. metaflow-stubs/user_configs/config_options.pyi +2 -2
  244. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  245. {ob_metaflow_stubs-6.0.4.6rc0.dist-info → ob_metaflow_stubs-6.0.4.7.dist-info}/METADATA +1 -1
  246. ob_metaflow_stubs-6.0.4.7.dist-info/RECORD +249 -0
  247. ob_metaflow_stubs-6.0.4.6rc0.dist-info/RECORD +0 -249
  248. {ob_metaflow_stubs-6.0.4.6rc0.dist-info → ob_metaflow_stubs-6.0.4.7.dist-info}/WHEEL +0 -0
  249. {ob_metaflow_stubs-6.0.4.6rc0.dist-info → ob_metaflow_stubs-6.0.4.7.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.15.21.2+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-16T22:08:50.283182 #
3
+ # MF version: 2.15.21.4+obcheckpoint(0.2.4);ob(v1) #
4
+ # Generated on 2025-07-25T18:05:15.016391 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import datetime
12
11
  import typing
12
+ import datetime
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -36,17 +36,17 @@ from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
38
  from . import events as events
39
- from . import metaflow_git as metaflow_git
40
- from . import cards as cards
41
39
  from . import tuple_util as tuple_util
40
+ from . import cards as cards
41
+ from . import metaflow_git as metaflow_git
42
42
  from . import runner as runner
43
43
  from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
45
  from . import includefile as includefile
46
46
  from .includefile import IncludeFile as IncludeFile
47
+ from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
47
48
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
48
49
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
- from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
50
50
  from . import client as client
51
51
  from .client.core import namespace as namespace
52
52
  from .client.core import get_namespace as get_namespace
@@ -157,75 +157,41 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
157
157
  ...
158
158
 
159
159
  @typing.overload
160
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
160
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
161
161
  """
162
- Specifies a timeout for your step.
163
-
164
- This decorator is useful if this step may hang indefinitely.
165
-
166
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
167
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
168
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
169
-
170
- Note that all the values specified in parameters are added together so if you specify
171
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
162
+ Specifies secrets to be retrieved and injected as environment variables prior to
163
+ the execution of a step.
172
164
 
173
165
 
174
166
  Parameters
175
167
  ----------
176
- seconds : int, default 0
177
- Number of seconds to wait prior to timing out.
178
- minutes : int, default 0
179
- Number of minutes to wait prior to timing out.
180
- hours : int, default 0
181
- Number of hours to wait prior to timing out.
168
+ sources : List[Union[str, Dict[str, Any]]], default: []
169
+ List of secret specs, defining how the secrets are to be retrieved
170
+ role : str, optional, default: None
171
+ Role to use for fetching secrets
182
172
  """
183
173
  ...
184
174
 
185
175
  @typing.overload
186
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
176
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
187
177
  ...
188
178
 
189
179
  @typing.overload
190
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
191
- ...
192
-
193
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
194
- """
195
- Specifies a timeout for your step.
196
-
197
- This decorator is useful if this step may hang indefinitely.
198
-
199
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
200
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
201
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
202
-
203
- Note that all the values specified in parameters are added together so if you specify
204
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
205
-
206
-
207
- Parameters
208
- ----------
209
- seconds : int, default 0
210
- Number of seconds to wait prior to timing out.
211
- minutes : int, default 0
212
- Number of minutes to wait prior to timing out.
213
- hours : int, default 0
214
- Number of hours to wait prior to timing out.
215
- """
180
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
216
181
  ...
217
182
 
218
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
183
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
219
184
  """
220
- Specifies that this step should execute on DGX cloud.
185
+ Specifies secrets to be retrieved and injected as environment variables prior to
186
+ the execution of a step.
221
187
 
222
188
 
223
189
  Parameters
224
190
  ----------
225
- gpu : int
226
- Number of GPUs to use.
227
- gpu_type : str
228
- Type of Nvidia GPU to use.
191
+ sources : List[Union[str, Dict[str, Any]]], default: []
192
+ List of secret specs, defining how the secrets are to be retrieved
193
+ role : str, optional, default: None
194
+ Role to use for fetching secrets
229
195
  """
230
196
  ...
231
197
 
@@ -249,317 +215,172 @@ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None
249
215
  ...
250
216
 
251
217
  @typing.overload
252
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
218
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
253
219
  """
254
- Specifies secrets to be retrieved and injected as environment variables prior to
255
- the execution of a step.
220
+ Enables checkpointing for a step.
221
+
222
+ > Examples
223
+
224
+ - Saving Checkpoints
225
+
226
+ ```python
227
+ @checkpoint
228
+ @step
229
+ def train(self):
230
+ model = create_model(self.parameters, checkpoint_path = None)
231
+ for i in range(self.epochs):
232
+ # some training logic
233
+ loss = model.train(self.dataset)
234
+ if i % 10 == 0:
235
+ model.save(
236
+ current.checkpoint.directory,
237
+ )
238
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
239
+ # and returns a reference dictionary to the checkpoint saved in the datastore
240
+ self.latest_checkpoint = current.checkpoint.save(
241
+ name="epoch_checkpoint",
242
+ metadata={
243
+ "epoch": i,
244
+ "loss": loss,
245
+ }
246
+ )
247
+ ```
248
+
249
+ - Using Loaded Checkpoints
250
+
251
+ ```python
252
+ @retry(times=3)
253
+ @checkpoint
254
+ @step
255
+ def train(self):
256
+ # Assume that the task has restarted and the previous attempt of the task
257
+ # saved a checkpoint
258
+ checkpoint_path = None
259
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
260
+ print("Loaded checkpoint from the previous attempt")
261
+ checkpoint_path = current.checkpoint.directory
262
+
263
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
264
+ for i in range(self.epochs):
265
+ ...
266
+ ```
256
267
 
257
268
 
258
269
  Parameters
259
270
  ----------
260
- sources : List[Union[str, Dict[str, Any]]], default: []
261
- List of secret specs, defining how the secrets are to be retrieved
262
- role : str, optional, default: None
263
- Role to use for fetching secrets
271
+ load_policy : str, default: "fresh"
272
+ The policy for loading the checkpoint. The following policies are supported:
273
+ - "eager": Loads the the latest available checkpoint within the namespace.
274
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
275
+ will be loaded at the start of the task.
276
+ - "none": Do not load any checkpoint
277
+ - "fresh": Loads the lastest checkpoint created within the running Task.
278
+ This mode helps loading checkpoints across various retry attempts of the same task.
279
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
280
+ created within the task will be loaded when the task is retries execution on failure.
281
+
282
+ temp_dir_root : str, default: None
283
+ The root directory under which `current.checkpoint.directory` will be created.
264
284
  """
265
285
  ...
266
286
 
267
287
  @typing.overload
268
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
288
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
269
289
  ...
270
290
 
271
291
  @typing.overload
272
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
292
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
273
293
  ...
274
294
 
275
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
295
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
276
296
  """
277
- Specifies secrets to be retrieved and injected as environment variables prior to
278
- the execution of a step.
297
+ Enables checkpointing for a step.
298
+
299
+ > Examples
300
+
301
+ - Saving Checkpoints
302
+
303
+ ```python
304
+ @checkpoint
305
+ @step
306
+ def train(self):
307
+ model = create_model(self.parameters, checkpoint_path = None)
308
+ for i in range(self.epochs):
309
+ # some training logic
310
+ loss = model.train(self.dataset)
311
+ if i % 10 == 0:
312
+ model.save(
313
+ current.checkpoint.directory,
314
+ )
315
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
316
+ # and returns a reference dictionary to the checkpoint saved in the datastore
317
+ self.latest_checkpoint = current.checkpoint.save(
318
+ name="epoch_checkpoint",
319
+ metadata={
320
+ "epoch": i,
321
+ "loss": loss,
322
+ }
323
+ )
324
+ ```
325
+
326
+ - Using Loaded Checkpoints
327
+
328
+ ```python
329
+ @retry(times=3)
330
+ @checkpoint
331
+ @step
332
+ def train(self):
333
+ # Assume that the task has restarted and the previous attempt of the task
334
+ # saved a checkpoint
335
+ checkpoint_path = None
336
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
337
+ print("Loaded checkpoint from the previous attempt")
338
+ checkpoint_path = current.checkpoint.directory
339
+
340
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
341
+ for i in range(self.epochs):
342
+ ...
343
+ ```
279
344
 
280
345
 
281
346
  Parameters
282
347
  ----------
283
- sources : List[Union[str, Dict[str, Any]]], default: []
284
- List of secret specs, defining how the secrets are to be retrieved
285
- role : str, optional, default: None
286
- Role to use for fetching secrets
348
+ load_policy : str, default: "fresh"
349
+ The policy for loading the checkpoint. The following policies are supported:
350
+ - "eager": Loads the the latest available checkpoint within the namespace.
351
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
352
+ will be loaded at the start of the task.
353
+ - "none": Do not load any checkpoint
354
+ - "fresh": Loads the lastest checkpoint created within the running Task.
355
+ This mode helps loading checkpoints across various retry attempts of the same task.
356
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
357
+ created within the task will be loaded when the task is retries execution on failure.
358
+
359
+ temp_dir_root : str, default: None
360
+ The root directory under which `current.checkpoint.directory` will be created.
287
361
  """
288
362
  ...
289
363
 
290
- @typing.overload
291
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
364
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
292
365
  """
293
- Specifies the PyPI packages for the step.
294
-
295
- Information in this decorator will augment any
296
- attributes set in the `@pyi_base` flow-level decorator. Hence,
297
- you can use `@pypi_base` to set packages required by all
298
- steps and use `@pypi` to specify step-specific overrides.
366
+ Specifies that this step should execute on DGX cloud.
299
367
 
300
368
 
301
369
  Parameters
302
370
  ----------
303
- packages : Dict[str, str], default: {}
304
- Packages to use for this step. The key is the name of the package
305
- and the value is the version to use.
306
- python : str, optional, default: None
307
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
308
- that the version used will correspond to the version of the Python interpreter used to start the run.
371
+ gpu : int
372
+ Number of GPUs to use.
373
+ gpu_type : str
374
+ Type of Nvidia GPU to use.
375
+ queue_timeout : int
376
+ Time to keep the job in NVCF's queue.
309
377
  """
310
378
  ...
311
379
 
312
380
  @typing.overload
313
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
314
- ...
315
-
316
- @typing.overload
317
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
318
- ...
319
-
320
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
381
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
321
382
  """
322
- Specifies the PyPI packages for the step.
323
-
324
- Information in this decorator will augment any
325
- attributes set in the `@pyi_base` flow-level decorator. Hence,
326
- you can use `@pypi_base` to set packages required by all
327
- steps and use `@pypi` to specify step-specific overrides.
328
-
329
-
330
- Parameters
331
- ----------
332
- packages : Dict[str, str], default: {}
333
- Packages to use for this step. The key is the name of the package
334
- and the value is the version to use.
335
- python : str, optional, default: None
336
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
337
- that the version used will correspond to the version of the Python interpreter used to start the run.
338
- """
339
- ...
340
-
341
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
342
- """
343
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
344
-
345
- User code call
346
- --------------
347
- @ollama(
348
- models=[...],
349
- ...
350
- )
351
-
352
- Valid backend options
353
- ---------------------
354
- - 'local': Run as a separate process on the local task machine.
355
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
356
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
357
-
358
- Valid model options
359
- -------------------
360
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
361
-
362
-
363
- Parameters
364
- ----------
365
- models: list[str]
366
- List of Ollama containers running models in sidecars.
367
- backend: str
368
- Determines where and how to run the Ollama process.
369
- force_pull: bool
370
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
371
- cache_update_policy: str
372
- Cache update policy: "auto", "force", or "never".
373
- force_cache_update: bool
374
- Simple override for "force" cache update policy.
375
- debug: bool
376
- Whether to turn on verbose debugging logs.
377
- circuit_breaker_config: dict
378
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
379
- timeout_config: dict
380
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
381
- """
382
- ...
383
-
384
- @typing.overload
385
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
386
- """
387
- Internal decorator to support Fast bakery
388
- """
389
- ...
390
-
391
- @typing.overload
392
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
393
- ...
394
-
395
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
396
- """
397
- Internal decorator to support Fast bakery
398
- """
399
- ...
400
-
401
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
402
- """
403
- Specifies that this step should execute on Kubernetes.
404
-
405
-
406
- Parameters
407
- ----------
408
- cpu : int, default 1
409
- Number of CPUs required for this step. If `@resources` is
410
- also present, the maximum value from all decorators is used.
411
- memory : int, default 4096
412
- Memory size (in MB) required for this step. If
413
- `@resources` is also present, the maximum value from all decorators is
414
- used.
415
- disk : int, default 10240
416
- Disk size (in MB) required for this step. If
417
- `@resources` is also present, the maximum value from all decorators is
418
- used.
419
- image : str, optional, default None
420
- Docker image to use when launching on Kubernetes. If not specified, and
421
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
422
- not, a default Docker image mapping to the current version of Python is used.
423
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
424
- If given, the imagePullPolicy to be applied to the Docker image of the step.
425
- image_pull_secrets: List[str], default []
426
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
427
- Kubernetes image pull secrets to use when pulling container images
428
- in Kubernetes.
429
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
430
- Kubernetes service account to use when launching pod in Kubernetes.
431
- secrets : List[str], optional, default None
432
- Kubernetes secrets to use when launching pod in Kubernetes. These
433
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
434
- in Metaflow configuration.
435
- node_selector: Union[Dict[str,str], str], optional, default None
436
- Kubernetes node selector(s) to apply to the pod running the task.
437
- Can be passed in as a comma separated string of values e.g.
438
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
439
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
440
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
441
- Kubernetes namespace to use when launching pod in Kubernetes.
442
- gpu : int, optional, default None
443
- Number of GPUs required for this step. A value of zero implies that
444
- the scheduled node should not have GPUs.
445
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
446
- The vendor of the GPUs to be used for this step.
447
- tolerations : List[str], default []
448
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
449
- Kubernetes tolerations to use when launching pod in Kubernetes.
450
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
451
- Kubernetes labels to use when launching pod in Kubernetes.
452
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
453
- Kubernetes annotations to use when launching pod in Kubernetes.
454
- use_tmpfs : bool, default False
455
- This enables an explicit tmpfs mount for this step.
456
- tmpfs_tempdir : bool, default True
457
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
458
- tmpfs_size : int, optional, default: None
459
- The value for the size (in MiB) of the tmpfs mount for this step.
460
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
461
- memory allocated for this step.
462
- tmpfs_path : str, optional, default /metaflow_temp
463
- Path to tmpfs mount for this step.
464
- persistent_volume_claims : Dict[str, str], optional, default None
465
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
466
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
467
- shared_memory: int, optional
468
- Shared memory size (in MiB) required for this step
469
- port: int, optional
470
- Port number to specify in the Kubernetes job object
471
- compute_pool : str, optional, default None
472
- Compute pool to be used for for this step.
473
- If not specified, any accessible compute pool within the perimeter is used.
474
- hostname_resolution_timeout: int, default 10 * 60
475
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
476
- Only applicable when @parallel is used.
477
- qos: str, default: Burstable
478
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
479
-
480
- security_context: Dict[str, Any], optional, default None
481
- Container security context. Applies to the task container. Allows the following keys:
482
- - privileged: bool, optional, default None
483
- - allow_privilege_escalation: bool, optional, default None
484
- - run_as_user: int, optional, default None
485
- - run_as_group: int, optional, default None
486
- - run_as_non_root: bool, optional, default None
487
- """
488
- ...
489
-
490
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
491
- """
492
- This decorator is used to run vllm APIs as Metaflow task sidecars.
493
-
494
- User code call
495
- --------------
496
- @vllm(
497
- model="...",
498
- ...
499
- )
500
-
501
- Valid backend options
502
- ---------------------
503
- - 'local': Run as a separate process on the local task machine.
504
-
505
- Valid model options
506
- -------------------
507
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
508
-
509
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
510
- If you need multiple models, you must create multiple @vllm decorators.
511
-
512
-
513
- Parameters
514
- ----------
515
- model: str
516
- HuggingFace model identifier to be served by vLLM.
517
- backend: str
518
- Determines where and how to run the vLLM process.
519
- openai_api_server: bool
520
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
521
- Default is False (uses native engine).
522
- Set to True for backward compatibility with existing code.
523
- debug: bool
524
- Whether to turn on verbose debugging logs.
525
- card_refresh_interval: int
526
- Interval in seconds for refreshing the vLLM status card.
527
- Only used when openai_api_server=True.
528
- max_retries: int
529
- Maximum number of retries checking for vLLM server startup.
530
- Only used when openai_api_server=True.
531
- retry_alert_frequency: int
532
- Frequency of alert logs for vLLM server startup retries.
533
- Only used when openai_api_server=True.
534
- engine_args : dict
535
- Additional keyword arguments to pass to the vLLM engine.
536
- For example, `tensor_parallel_size=2`.
537
- """
538
- ...
539
-
540
- @typing.overload
541
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
542
- """
543
- Decorator prototype for all step decorators. This function gets specialized
544
- and imported for all decorators types by _import_plugin_decorators().
545
- """
546
- ...
547
-
548
- @typing.overload
549
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
550
- ...
551
-
552
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
553
- """
554
- Decorator prototype for all step decorators. This function gets specialized
555
- and imported for all decorators types by _import_plugin_decorators().
556
- """
557
- ...
558
-
559
- @typing.overload
560
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
561
- """
562
- Enables loading / saving of models within a step.
383
+ Enables loading / saving of models within a step.
563
384
 
564
385
  > Examples
565
386
  - Saving Models
@@ -686,366 +507,160 @@ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
686
507
  ...
687
508
 
688
509
  @typing.overload
689
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
510
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
690
511
  """
691
- Enables checkpointing for a step.
512
+ Specifies the Conda environment for the step.
692
513
 
693
- > Examples
694
-
695
- - Saving Checkpoints
696
-
697
- ```python
698
- @checkpoint
699
- @step
700
- def train(self):
701
- model = create_model(self.parameters, checkpoint_path = None)
702
- for i in range(self.epochs):
703
- # some training logic
704
- loss = model.train(self.dataset)
705
- if i % 10 == 0:
706
- model.save(
707
- current.checkpoint.directory,
708
- )
709
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
710
- # and returns a reference dictionary to the checkpoint saved in the datastore
711
- self.latest_checkpoint = current.checkpoint.save(
712
- name="epoch_checkpoint",
713
- metadata={
714
- "epoch": i,
715
- "loss": loss,
716
- }
717
- )
718
- ```
719
-
720
- - Using Loaded Checkpoints
721
-
722
- ```python
723
- @retry(times=3)
724
- @checkpoint
725
- @step
726
- def train(self):
727
- # Assume that the task has restarted and the previous attempt of the task
728
- # saved a checkpoint
729
- checkpoint_path = None
730
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
731
- print("Loaded checkpoint from the previous attempt")
732
- checkpoint_path = current.checkpoint.directory
733
-
734
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
735
- for i in range(self.epochs):
736
- ...
737
- ```
514
+ Information in this decorator will augment any
515
+ attributes set in the `@conda_base` flow-level decorator. Hence,
516
+ you can use `@conda_base` to set packages required by all
517
+ steps and use `@conda` to specify step-specific overrides.
738
518
 
739
519
 
740
520
  Parameters
741
521
  ----------
742
- load_policy : str, default: "fresh"
743
- The policy for loading the checkpoint. The following policies are supported:
744
- - "eager": Loads the the latest available checkpoint within the namespace.
745
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
746
- will be loaded at the start of the task.
747
- - "none": Do not load any checkpoint
748
- - "fresh": Loads the lastest checkpoint created within the running Task.
749
- This mode helps loading checkpoints across various retry attempts of the same task.
750
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
751
- created within the task will be loaded when the task is retries execution on failure.
752
-
753
- temp_dir_root : str, default: None
754
- The root directory under which `current.checkpoint.directory` will be created.
522
+ packages : Dict[str, str], default {}
523
+ Packages to use for this step. The key is the name of the package
524
+ and the value is the version to use.
525
+ libraries : Dict[str, str], default {}
526
+ Supported for backward compatibility. When used with packages, packages will take precedence.
527
+ python : str, optional, default None
528
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
529
+ that the version used will correspond to the version of the Python interpreter used to start the run.
530
+ disabled : bool, default False
531
+ If set to True, disables @conda.
755
532
  """
756
533
  ...
757
534
 
758
535
  @typing.overload
759
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
536
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
760
537
  ...
761
538
 
762
539
  @typing.overload
763
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
540
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
764
541
  ...
765
542
 
766
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
543
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
767
544
  """
768
- Enables checkpointing for a step.
769
-
770
- > Examples
771
-
772
- - Saving Checkpoints
773
-
774
- ```python
775
- @checkpoint
776
- @step
777
- def train(self):
778
- model = create_model(self.parameters, checkpoint_path = None)
779
- for i in range(self.epochs):
780
- # some training logic
781
- loss = model.train(self.dataset)
782
- if i % 10 == 0:
783
- model.save(
784
- current.checkpoint.directory,
785
- )
786
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
787
- # and returns a reference dictionary to the checkpoint saved in the datastore
788
- self.latest_checkpoint = current.checkpoint.save(
789
- name="epoch_checkpoint",
790
- metadata={
791
- "epoch": i,
792
- "loss": loss,
793
- }
794
- )
795
- ```
796
-
797
- - Using Loaded Checkpoints
798
-
799
- ```python
800
- @retry(times=3)
801
- @checkpoint
802
- @step
803
- def train(self):
804
- # Assume that the task has restarted and the previous attempt of the task
805
- # saved a checkpoint
806
- checkpoint_path = None
807
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
808
- print("Loaded checkpoint from the previous attempt")
809
- checkpoint_path = current.checkpoint.directory
810
-
811
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
812
- for i in range(self.epochs):
813
- ...
814
- ```
815
-
816
-
817
- Parameters
818
- ----------
819
- load_policy : str, default: "fresh"
820
- The policy for loading the checkpoint. The following policies are supported:
821
- - "eager": Loads the the latest available checkpoint within the namespace.
822
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
823
- will be loaded at the start of the task.
824
- - "none": Do not load any checkpoint
825
- - "fresh": Loads the lastest checkpoint created within the running Task.
826
- This mode helps loading checkpoints across various retry attempts of the same task.
827
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
828
- created within the task will be loaded when the task is retries execution on failure.
545
+ Specifies the Conda environment for the step.
829
546
 
830
- temp_dir_root : str, default: None
831
- The root directory under which `current.checkpoint.directory` will be created.
832
- """
833
- ...
834
-
835
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
836
- """
837
- Specifies that this step should execute on DGX cloud.
547
+ Information in this decorator will augment any
548
+ attributes set in the `@conda_base` flow-level decorator. Hence,
549
+ you can use `@conda_base` to set packages required by all
550
+ steps and use `@conda` to specify step-specific overrides.
838
551
 
839
552
 
840
553
  Parameters
841
554
  ----------
842
- gpu : int
843
- Number of GPUs to use.
844
- gpu_type : str
845
- Type of Nvidia GPU to use.
846
- queue_timeout : int
847
- Time to keep the job in NVCF's queue.
555
+ packages : Dict[str, str], default {}
556
+ Packages to use for this step. The key is the name of the package
557
+ and the value is the version to use.
558
+ libraries : Dict[str, str], default {}
559
+ Supported for backward compatibility. When used with packages, packages will take precedence.
560
+ python : str, optional, default None
561
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
562
+ that the version used will correspond to the version of the Python interpreter used to start the run.
563
+ disabled : bool, default False
564
+ If set to True, disables @conda.
848
565
  """
849
566
  ...
850
567
 
851
568
  @typing.overload
852
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
569
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
853
570
  """
854
- Specifies the number of times the task corresponding
855
- to a step needs to be retried.
856
-
857
- This decorator is useful for handling transient errors, such as networking issues.
858
- If your task contains operations that can't be retried safely, e.g. database updates,
859
- it is advisable to annotate it with `@retry(times=0)`.
571
+ Creates a human-readable report, a Metaflow Card, after this step completes.
860
572
 
861
- This can be used in conjunction with the `@catch` decorator. The `@catch`
862
- decorator will execute a no-op task after all retries have been exhausted,
863
- ensuring that the flow execution can continue.
573
+ Note that you may add multiple `@card` decorators in a step with different parameters.
864
574
 
865
575
 
866
576
  Parameters
867
577
  ----------
868
- times : int, default 3
869
- Number of times to retry this task.
870
- minutes_between_retries : int, default 2
871
- Number of minutes between retries.
578
+ type : str, default 'default'
579
+ Card type.
580
+ id : str, optional, default None
581
+ If multiple cards are present, use this id to identify this card.
582
+ options : Dict[str, Any], default {}
583
+ Options passed to the card. The contents depend on the card type.
584
+ timeout : int, default 45
585
+ Interrupt reporting if it takes more than this many seconds.
872
586
  """
873
587
  ...
874
588
 
875
589
  @typing.overload
876
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
590
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
877
591
  ...
878
592
 
879
593
  @typing.overload
880
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
594
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
881
595
  ...
882
596
 
883
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
597
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
884
598
  """
885
- Specifies the number of times the task corresponding
886
- to a step needs to be retried.
887
-
888
- This decorator is useful for handling transient errors, such as networking issues.
889
- If your task contains operations that can't be retried safely, e.g. database updates,
890
- it is advisable to annotate it with `@retry(times=0)`.
599
+ Creates a human-readable report, a Metaflow Card, after this step completes.
891
600
 
892
- This can be used in conjunction with the `@catch` decorator. The `@catch`
893
- decorator will execute a no-op task after all retries have been exhausted,
894
- ensuring that the flow execution can continue.
601
+ Note that you may add multiple `@card` decorators in a step with different parameters.
895
602
 
896
603
 
897
604
  Parameters
898
605
  ----------
899
- times : int, default 3
900
- Number of times to retry this task.
901
- minutes_between_retries : int, default 2
902
- Number of minutes between retries.
606
+ type : str, default 'default'
607
+ Card type.
608
+ id : str, optional, default None
609
+ If multiple cards are present, use this id to identify this card.
610
+ options : Dict[str, Any], default {}
611
+ Options passed to the card. The contents depend on the card type.
612
+ timeout : int, default 45
613
+ Interrupt reporting if it takes more than this many seconds.
903
614
  """
904
615
  ...
905
616
 
906
617
  @typing.overload
907
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
618
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
908
619
  """
909
- Specifies the Conda environment for the step.
620
+ Specifies the resources needed when executing this step.
910
621
 
911
- Information in this decorator will augment any
912
- attributes set in the `@conda_base` flow-level decorator. Hence,
913
- you can use `@conda_base` to set packages required by all
914
- steps and use `@conda` to specify step-specific overrides.
622
+ Use `@resources` to specify the resource requirements
623
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
624
+
625
+ You can choose the compute layer on the command line by executing e.g.
626
+ ```
627
+ python myflow.py run --with batch
628
+ ```
629
+ or
630
+ ```
631
+ python myflow.py run --with kubernetes
632
+ ```
633
+ which executes the flow on the desired system using the
634
+ requirements specified in `@resources`.
915
635
 
916
636
 
917
637
  Parameters
918
638
  ----------
919
- packages : Dict[str, str], default {}
920
- Packages to use for this step. The key is the name of the package
921
- and the value is the version to use.
922
- libraries : Dict[str, str], default {}
923
- Supported for backward compatibility. When used with packages, packages will take precedence.
924
- python : str, optional, default None
925
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
926
- that the version used will correspond to the version of the Python interpreter used to start the run.
927
- disabled : bool, default False
928
- If set to True, disables @conda.
639
+ cpu : int, default 1
640
+ Number of CPUs required for this step.
641
+ gpu : int, optional, default None
642
+ Number of GPUs required for this step.
643
+ disk : int, optional, default None
644
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
645
+ memory : int, default 4096
646
+ Memory size (in MB) required for this step.
647
+ shared_memory : int, optional, default None
648
+ The value for the size (in MiB) of the /dev/shm volume for this step.
649
+ This parameter maps to the `--shm-size` option in Docker.
929
650
  """
930
651
  ...
931
652
 
932
653
  @typing.overload
933
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
654
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
934
655
  ...
935
656
 
936
657
  @typing.overload
937
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
658
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
938
659
  ...
939
660
 
940
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
661
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
941
662
  """
942
- Specifies the Conda environment for the step.
943
-
944
- Information in this decorator will augment any
945
- attributes set in the `@conda_base` flow-level decorator. Hence,
946
- you can use `@conda_base` to set packages required by all
947
- steps and use `@conda` to specify step-specific overrides.
948
-
949
-
950
- Parameters
951
- ----------
952
- packages : Dict[str, str], default {}
953
- Packages to use for this step. The key is the name of the package
954
- and the value is the version to use.
955
- libraries : Dict[str, str], default {}
956
- Supported for backward compatibility. When used with packages, packages will take precedence.
957
- python : str, optional, default None
958
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
959
- that the version used will correspond to the version of the Python interpreter used to start the run.
960
- disabled : bool, default False
961
- If set to True, disables @conda.
962
- """
963
- ...
964
-
965
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
966
- """
967
- Decorator that helps cache, version and store models/datasets from huggingface hub.
968
-
969
- > Examples
970
-
971
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
972
- ```python
973
- @huggingface_hub
974
- @step
975
- def pull_model_from_huggingface(self):
976
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
977
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
978
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
979
- # value of the function is a reference to the model in the backend storage.
980
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
981
-
982
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
983
- self.llama_model = current.huggingface_hub.snapshot_download(
984
- repo_id=self.model_id,
985
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
986
- )
987
- self.next(self.train)
988
- ```
989
-
990
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
991
- ```python
992
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
993
- @step
994
- def pull_model_from_huggingface(self):
995
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
996
- ```
997
-
998
- ```python
999
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
1000
- @step
1001
- def finetune_model(self):
1002
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1003
- # path_to_model will be /my-directory
1004
- ```
1005
-
1006
- ```python
1007
- # Takes all the arguments passed to `snapshot_download`
1008
- # except for `local_dir`
1009
- @huggingface_hub(load=[
1010
- {
1011
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
1012
- },
1013
- {
1014
- "repo_id": "myorg/mistral-lora",
1015
- "repo_type": "model",
1016
- },
1017
- ])
1018
- @step
1019
- def finetune_model(self):
1020
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1021
- # path_to_model will be /my-directory
1022
- ```
1023
-
1024
-
1025
- Parameters
1026
- ----------
1027
- temp_dir_root : str, optional
1028
- The root directory that will hold the temporary directory where objects will be downloaded.
1029
-
1030
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
1031
- The list of repos (models/datasets) to load.
1032
-
1033
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
1034
-
1035
- - If repo (model/dataset) is not found in the datastore:
1036
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
1037
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
1038
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
1039
-
1040
- - If repo is found in the datastore:
1041
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
1042
- """
1043
- ...
1044
-
1045
- @typing.overload
1046
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1047
- """
1048
- Specifies the resources needed when executing this step.
663
+ Specifies the resources needed when executing this step.
1049
664
 
1050
665
  Use `@resources` to specify the resource requirements
1051
666
  independently of the specific compute layer (`@batch`, `@kubernetes`).
@@ -1079,421 +694,582 @@ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Op
1079
694
  ...
1080
695
 
1081
696
  @typing.overload
1082
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1083
- ...
1084
-
1085
- @typing.overload
1086
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1087
- ...
1088
-
1089
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
697
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1090
698
  """
1091
- Specifies the resources needed when executing this step.
1092
-
1093
- Use `@resources` to specify the resource requirements
1094
- independently of the specific compute layer (`@batch`, `@kubernetes`).
1095
-
1096
- You can choose the compute layer on the command line by executing e.g.
1097
- ```
1098
- python myflow.py run --with batch
1099
- ```
1100
- or
1101
- ```
1102
- python myflow.py run --with kubernetes
1103
- ```
1104
- which executes the flow on the desired system using the
1105
- requirements specified in `@resources`.
699
+ Specifies a timeout for your step.
1106
700
 
701
+ This decorator is useful if this step may hang indefinitely.
1107
702
 
1108
- Parameters
1109
- ----------
1110
- cpu : int, default 1
1111
- Number of CPUs required for this step.
1112
- gpu : int, optional, default None
1113
- Number of GPUs required for this step.
1114
- disk : int, optional, default None
1115
- Disk size (in MB) required for this step. Only applies on Kubernetes.
1116
- memory : int, default 4096
1117
- Memory size (in MB) required for this step.
1118
- shared_memory : int, optional, default None
1119
- The value for the size (in MiB) of the /dev/shm volume for this step.
1120
- This parameter maps to the `--shm-size` option in Docker.
1121
- """
1122
- ...
1123
-
1124
- @typing.overload
1125
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1126
- """
1127
- Specifies that the step will success under all circumstances.
703
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
704
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
705
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1128
706
 
1129
- The decorator will create an optional artifact, specified by `var`, which
1130
- contains the exception raised. You can use it to detect the presence
1131
- of errors, indicating that all happy-path artifacts produced by the step
1132
- are missing.
707
+ Note that all the values specified in parameters are added together so if you specify
708
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1133
709
 
1134
710
 
1135
711
  Parameters
1136
712
  ----------
1137
- var : str, optional, default None
1138
- Name of the artifact in which to store the caught exception.
1139
- If not specified, the exception is not stored.
1140
- print_exception : bool, default True
1141
- Determines whether or not the exception is printed to
1142
- stdout when caught.
713
+ seconds : int, default 0
714
+ Number of seconds to wait prior to timing out.
715
+ minutes : int, default 0
716
+ Number of minutes to wait prior to timing out.
717
+ hours : int, default 0
718
+ Number of hours to wait prior to timing out.
1143
719
  """
1144
720
  ...
1145
721
 
1146
722
  @typing.overload
1147
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
723
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1148
724
  ...
1149
725
 
1150
726
  @typing.overload
1151
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
727
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1152
728
  ...
1153
729
 
1154
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
730
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1155
731
  """
1156
- Specifies that the step will success under all circumstances.
732
+ Specifies a timeout for your step.
1157
733
 
1158
- The decorator will create an optional artifact, specified by `var`, which
1159
- contains the exception raised. You can use it to detect the presence
1160
- of errors, indicating that all happy-path artifacts produced by the step
1161
- are missing.
734
+ This decorator is useful if this step may hang indefinitely.
1162
735
 
736
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
737
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
738
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1163
739
 
1164
- Parameters
1165
- ----------
1166
- var : str, optional, default None
1167
- Name of the artifact in which to store the caught exception.
1168
- If not specified, the exception is not stored.
1169
- print_exception : bool, default True
1170
- Determines whether or not the exception is printed to
1171
- stdout when caught.
1172
- """
1173
- ...
1174
-
1175
- @typing.overload
1176
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1177
- """
1178
- Specifies environment variables to be set prior to the execution of a step.
740
+ Note that all the values specified in parameters are added together so if you specify
741
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1179
742
 
1180
743
 
1181
744
  Parameters
1182
745
  ----------
1183
- vars : Dict[str, str], default {}
1184
- Dictionary of environment variables to set.
746
+ seconds : int, default 0
747
+ Number of seconds to wait prior to timing out.
748
+ minutes : int, default 0
749
+ Number of minutes to wait prior to timing out.
750
+ hours : int, default 0
751
+ Number of hours to wait prior to timing out.
1185
752
  """
1186
753
  ...
1187
754
 
1188
- @typing.overload
1189
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1190
- ...
1191
-
1192
- @typing.overload
1193
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1194
- ...
1195
-
1196
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
755
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1197
756
  """
1198
- Specifies environment variables to be set prior to the execution of a step.
757
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
758
+
759
+ User code call
760
+ --------------
761
+ @vllm(
762
+ model="...",
763
+ ...
764
+ )
1199
765
 
766
+ Valid backend options
767
+ ---------------------
768
+ - 'local': Run as a separate process on the local task machine.
1200
769
 
1201
- Parameters
1202
- ----------
1203
- vars : Dict[str, str], default {}
1204
- Dictionary of environment variables to set.
1205
- """
1206
- ...
1207
-
1208
- @typing.overload
1209
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1210
- """
1211
- Creates a human-readable report, a Metaflow Card, after this step completes.
770
+ Valid model options
771
+ -------------------
772
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1212
773
 
1213
- Note that you may add multiple `@card` decorators in a step with different parameters.
774
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
775
+ If you need multiple models, you must create multiple @vllm decorators.
1214
776
 
1215
777
 
1216
778
  Parameters
1217
779
  ----------
1218
- type : str, default 'default'
1219
- Card type.
1220
- id : str, optional, default None
1221
- If multiple cards are present, use this id to identify this card.
1222
- options : Dict[str, Any], default {}
1223
- Options passed to the card. The contents depend on the card type.
1224
- timeout : int, default 45
1225
- Interrupt reporting if it takes more than this many seconds.
780
+ model: str
781
+ HuggingFace model identifier to be served by vLLM.
782
+ backend: str
783
+ Determines where and how to run the vLLM process.
784
+ openai_api_server: bool
785
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
786
+ Default is False (uses native engine).
787
+ Set to True for backward compatibility with existing code.
788
+ debug: bool
789
+ Whether to turn on verbose debugging logs.
790
+ card_refresh_interval: int
791
+ Interval in seconds for refreshing the vLLM status card.
792
+ Only used when openai_api_server=True.
793
+ max_retries: int
794
+ Maximum number of retries checking for vLLM server startup.
795
+ Only used when openai_api_server=True.
796
+ retry_alert_frequency: int
797
+ Frequency of alert logs for vLLM server startup retries.
798
+ Only used when openai_api_server=True.
799
+ engine_args : dict
800
+ Additional keyword arguments to pass to the vLLM engine.
801
+ For example, `tensor_parallel_size=2`.
1226
802
  """
1227
803
  ...
1228
804
 
1229
805
  @typing.overload
1230
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
806
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
807
+ """
808
+ Specifies the number of times the task corresponding
809
+ to a step needs to be retried.
810
+
811
+ This decorator is useful for handling transient errors, such as networking issues.
812
+ If your task contains operations that can't be retried safely, e.g. database updates,
813
+ it is advisable to annotate it with `@retry(times=0)`.
814
+
815
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
816
+ decorator will execute a no-op task after all retries have been exhausted,
817
+ ensuring that the flow execution can continue.
818
+
819
+
820
+ Parameters
821
+ ----------
822
+ times : int, default 3
823
+ Number of times to retry this task.
824
+ minutes_between_retries : int, default 2
825
+ Number of minutes between retries.
826
+ """
1231
827
  ...
1232
828
 
1233
829
  @typing.overload
1234
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
830
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1235
831
  ...
1236
832
 
1237
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
833
+ @typing.overload
834
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
835
+ ...
836
+
837
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1238
838
  """
1239
- Creates a human-readable report, a Metaflow Card, after this step completes.
839
+ Specifies the number of times the task corresponding
840
+ to a step needs to be retried.
1240
841
 
1241
- Note that you may add multiple `@card` decorators in a step with different parameters.
842
+ This decorator is useful for handling transient errors, such as networking issues.
843
+ If your task contains operations that can't be retried safely, e.g. database updates,
844
+ it is advisable to annotate it with `@retry(times=0)`.
845
+
846
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
847
+ decorator will execute a no-op task after all retries have been exhausted,
848
+ ensuring that the flow execution can continue.
1242
849
 
1243
850
 
1244
851
  Parameters
1245
852
  ----------
1246
- type : str, default 'default'
1247
- Card type.
1248
- id : str, optional, default None
1249
- If multiple cards are present, use this id to identify this card.
1250
- options : Dict[str, Any], default {}
1251
- Options passed to the card. The contents depend on the card type.
1252
- timeout : int, default 45
1253
- Interrupt reporting if it takes more than this many seconds.
853
+ times : int, default 3
854
+ Number of times to retry this task.
855
+ minutes_between_retries : int, default 2
856
+ Number of minutes between retries.
1254
857
  """
1255
858
  ...
1256
859
 
1257
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
860
+ @typing.overload
861
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1258
862
  """
1259
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1260
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
863
+ Decorator prototype for all step decorators. This function gets specialized
864
+ and imported for all decorators types by _import_plugin_decorators().
865
+ """
866
+ ...
867
+
868
+ @typing.overload
869
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
870
+ ...
871
+
872
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
873
+ """
874
+ Decorator prototype for all step decorators. This function gets specialized
875
+ and imported for all decorators types by _import_plugin_decorators().
876
+ """
877
+ ...
878
+
879
+ @typing.overload
880
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
881
+ """
882
+ Internal decorator to support Fast bakery
883
+ """
884
+ ...
885
+
886
+ @typing.overload
887
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
888
+ ...
889
+
890
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
891
+ """
892
+ Internal decorator to support Fast bakery
893
+ """
894
+ ...
895
+
896
+ @typing.overload
897
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
898
+ """
899
+ Specifies that the step will success under all circumstances.
900
+
901
+ The decorator will create an optional artifact, specified by `var`, which
902
+ contains the exception raised. You can use it to detect the presence
903
+ of errors, indicating that all happy-path artifacts produced by the step
904
+ are missing.
1261
905
 
1262
906
 
1263
907
  Parameters
1264
908
  ----------
1265
- timeout : int
1266
- Time, in seconds before the task times out and fails. (Default: 3600)
1267
- poke_interval : int
1268
- Time in seconds that the job should wait in between each try. (Default: 60)
1269
- mode : str
1270
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1271
- exponential_backoff : bool
1272
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1273
- pool : str
1274
- the slot pool this task should run in,
1275
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1276
- soft_fail : bool
1277
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1278
- name : str
1279
- Name of the sensor on Airflow
1280
- description : str
1281
- Description of sensor in the Airflow UI
1282
- external_dag_id : str
1283
- The dag_id that contains the task you want to wait for.
1284
- external_task_ids : List[str]
1285
- The list of task_ids that you want to wait for.
1286
- If None (default value) the sensor waits for the DAG. (Default: None)
1287
- allowed_states : List[str]
1288
- Iterable of allowed states, (Default: ['success'])
1289
- failed_states : List[str]
1290
- Iterable of failed or dis-allowed states. (Default: None)
1291
- execution_delta : datetime.timedelta
1292
- time difference with the previous execution to look at,
1293
- the default is the same logical date as the current task or DAG. (Default: None)
1294
- check_existence: bool
1295
- Set to True to check if the external task exists or check if
1296
- the DAG to wait for exists. (Default: True)
909
+ var : str, optional, default None
910
+ Name of the artifact in which to store the caught exception.
911
+ If not specified, the exception is not stored.
912
+ print_exception : bool, default True
913
+ Determines whether or not the exception is printed to
914
+ stdout when caught.
1297
915
  """
1298
916
  ...
1299
917
 
1300
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
918
+ @typing.overload
919
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
920
+ ...
921
+
922
+ @typing.overload
923
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
924
+ ...
925
+
926
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1301
927
  """
1302
- Allows setting external datastores to save data for the
1303
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
928
+ Specifies that the step will success under all circumstances.
1304
929
 
1305
- This decorator is useful when users wish to save data to a different datastore
1306
- than what is configured in Metaflow. This can be for variety of reasons:
930
+ The decorator will create an optional artifact, specified by `var`, which
931
+ contains the exception raised. You can use it to detect the presence
932
+ of errors, indicating that all happy-path artifacts produced by the step
933
+ are missing.
1307
934
 
1308
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1309
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1310
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1311
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1312
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1313
935
 
1314
- Usage:
936
+ Parameters
1315
937
  ----------
938
+ var : str, optional, default None
939
+ Name of the artifact in which to store the caught exception.
940
+ If not specified, the exception is not stored.
941
+ print_exception : bool, default True
942
+ Determines whether or not the exception is printed to
943
+ stdout when caught.
944
+ """
945
+ ...
946
+
947
+ @typing.overload
948
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
949
+ """
950
+ Specifies environment variables to be set prior to the execution of a step.
1316
951
 
1317
- - Using a custom IAM role to access the datastore.
1318
-
1319
- ```python
1320
- @with_artifact_store(
1321
- type="s3",
1322
- config=lambda: {
1323
- "root": "s3://my-bucket-foo/path/to/root",
1324
- "role_arn": ROLE,
1325
- },
1326
- )
1327
- class MyFlow(FlowSpec):
1328
-
1329
- @checkpoint
1330
- @step
1331
- def start(self):
1332
- with open("my_file.txt", "w") as f:
1333
- f.write("Hello, World!")
1334
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1335
- self.next(self.end)
1336
952
 
1337
- ```
953
+ Parameters
954
+ ----------
955
+ vars : Dict[str, str], default {}
956
+ Dictionary of environment variables to set.
957
+ """
958
+ ...
959
+
960
+ @typing.overload
961
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
962
+ ...
963
+
964
+ @typing.overload
965
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
966
+ ...
967
+
968
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
969
+ """
970
+ Specifies environment variables to be set prior to the execution of a step.
1338
971
 
1339
- - Using credentials to access the s3-compatible datastore.
1340
972
 
1341
- ```python
1342
- @with_artifact_store(
1343
- type="s3",
1344
- config=lambda: {
1345
- "root": "s3://my-bucket-foo/path/to/root",
1346
- "client_params": {
1347
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1348
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1349
- },
1350
- },
1351
- )
1352
- class MyFlow(FlowSpec):
973
+ Parameters
974
+ ----------
975
+ vars : Dict[str, str], default {}
976
+ Dictionary of environment variables to set.
977
+ """
978
+ ...
979
+
980
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
981
+ """
982
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
1353
983
 
1354
- @checkpoint
1355
- @step
1356
- def start(self):
1357
- with open("my_file.txt", "w") as f:
1358
- f.write("Hello, World!")
1359
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1360
- self.next(self.end)
984
+ > Examples
1361
985
 
1362
- ```
986
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
987
+ ```python
988
+ @huggingface_hub
989
+ @step
990
+ def pull_model_from_huggingface(self):
991
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
992
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
993
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
994
+ # value of the function is a reference to the model in the backend storage.
995
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
1363
996
 
1364
- - Accessing objects stored in external datastores after task execution.
997
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
998
+ self.llama_model = current.huggingface_hub.snapshot_download(
999
+ repo_id=self.model_id,
1000
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
1001
+ )
1002
+ self.next(self.train)
1003
+ ```
1365
1004
 
1366
- ```python
1367
- run = Run("CheckpointsTestsFlow/8992")
1368
- with artifact_store_from(run=run, config={
1369
- "client_params": {
1370
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1371
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1372
- },
1373
- }):
1374
- with Checkpoint() as cp:
1375
- latest = cp.list(
1376
- task=run["start"].task
1377
- )[0]
1378
- print(latest)
1379
- cp.load(
1380
- latest,
1381
- "test-checkpoints"
1382
- )
1005
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
1006
+ ```python
1007
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
1008
+ @step
1009
+ def pull_model_from_huggingface(self):
1010
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1011
+ ```
1383
1012
 
1384
- task = Task("TorchTuneFlow/8484/train/53673")
1385
- with artifact_store_from(run=run, config={
1386
- "client_params": {
1387
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1388
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1013
+ ```python
1014
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
1015
+ @step
1016
+ def finetune_model(self):
1017
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1018
+ # path_to_model will be /my-directory
1019
+ ```
1020
+
1021
+ ```python
1022
+ # Takes all the arguments passed to `snapshot_download`
1023
+ # except for `local_dir`
1024
+ @huggingface_hub(load=[
1025
+ {
1026
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
1389
1027
  },
1390
- }):
1391
- load_model(
1392
- task.data.model_ref,
1393
- "test-models"
1394
- )
1395
- ```
1396
- Parameters:
1028
+ {
1029
+ "repo_id": "myorg/mistral-lora",
1030
+ "repo_type": "model",
1031
+ },
1032
+ ])
1033
+ @step
1034
+ def finetune_model(self):
1035
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1036
+ # path_to_model will be /my-directory
1037
+ ```
1038
+
1039
+
1040
+ Parameters
1397
1041
  ----------
1042
+ temp_dir_root : str, optional
1043
+ The root directory that will hold the temporary directory where objects will be downloaded.
1398
1044
 
1399
- type: str
1400
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1045
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
1046
+ The list of repos (models/datasets) to load.
1401
1047
 
1402
- config: dict or Callable
1403
- Dictionary of configuration options for the datastore. The following keys are required:
1404
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1405
- - example: 's3://bucket-name/path/to/root'
1406
- - example: 'gs://bucket-name/path/to/root'
1407
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1408
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1409
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1410
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1048
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
1049
+
1050
+ - If repo (model/dataset) is not found in the datastore:
1051
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
1052
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
1053
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
1054
+
1055
+ - If repo is found in the datastore:
1056
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
1411
1057
  """
1412
1058
  ...
1413
1059
 
1414
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1060
+ @typing.overload
1061
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1415
1062
  """
1416
- Specifies what flows belong to the same project.
1063
+ A simple decorator that demonstrates using CardDecoratorInjector
1064
+ to inject a card and render simple markdown content.
1065
+ """
1066
+ ...
1067
+
1068
+ @typing.overload
1069
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1070
+ ...
1071
+
1072
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1073
+ """
1074
+ A simple decorator that demonstrates using CardDecoratorInjector
1075
+ to inject a card and render simple markdown content.
1076
+ """
1077
+ ...
1078
+
1079
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1080
+ """
1081
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
1417
1082
 
1418
- A project-specific namespace is created for all flows that
1419
- use the same `@project(name)`.
1083
+ User code call
1084
+ --------------
1085
+ @ollama(
1086
+ models=[...],
1087
+ ...
1088
+ )
1089
+
1090
+ Valid backend options
1091
+ ---------------------
1092
+ - 'local': Run as a separate process on the local task machine.
1093
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1094
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1095
+
1096
+ Valid model options
1097
+ -------------------
1098
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
1420
1099
 
1421
1100
 
1422
1101
  Parameters
1423
1102
  ----------
1424
- name : str
1425
- Project name. Make sure that the name is unique amongst all
1426
- projects that use the same production scheduler. The name may
1427
- contain only lowercase alphanumeric characters and underscores.
1103
+ models: list[str]
1104
+ List of Ollama containers running models in sidecars.
1105
+ backend: str
1106
+ Determines where and how to run the Ollama process.
1107
+ force_pull: bool
1108
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1109
+ cache_update_policy: str
1110
+ Cache update policy: "auto", "force", or "never".
1111
+ force_cache_update: bool
1112
+ Simple override for "force" cache update policy.
1113
+ debug: bool
1114
+ Whether to turn on verbose debugging logs.
1115
+ circuit_breaker_config: dict
1116
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1117
+ timeout_config: dict
1118
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
1119
+ """
1120
+ ...
1121
+
1122
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1123
+ """
1124
+ Specifies that this step should execute on DGX cloud.
1428
1125
 
1429
- branch : Optional[str], default None
1430
- The branch to use. If not specified, the branch is set to
1431
- `user.<username>` unless `production` is set to `True`. This can
1432
- also be set on the command line using `--branch` as a top-level option.
1433
- It is an error to specify `branch` in the decorator and on the command line.
1434
1126
 
1435
- production : bool, default False
1436
- Whether or not the branch is the production branch. This can also be set on the
1437
- command line using `--production` as a top-level option. It is an error to specify
1438
- `production` in the decorator and on the command line.
1439
- The project branch name will be:
1440
- - if `branch` is specified:
1441
- - if `production` is True: `prod.<branch>`
1442
- - if `production` is False: `test.<branch>`
1443
- - if `branch` is not specified:
1444
- - if `production` is True: `prod`
1445
- - if `production` is False: `user.<username>`
1127
+ Parameters
1128
+ ----------
1129
+ gpu : int
1130
+ Number of GPUs to use.
1131
+ gpu_type : str
1132
+ Type of Nvidia GPU to use.
1446
1133
  """
1447
1134
  ...
1448
1135
 
1449
1136
  @typing.overload
1450
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1137
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1451
1138
  """
1452
- Specifies the Conda environment for all steps of the flow.
1139
+ Specifies the PyPI packages for the step.
1453
1140
 
1454
- Use `@conda_base` to set common libraries required by all
1455
- steps and use `@conda` to specify step-specific additions.
1141
+ Information in this decorator will augment any
1142
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1143
+ you can use `@pypi_base` to set packages required by all
1144
+ steps and use `@pypi` to specify step-specific overrides.
1456
1145
 
1457
1146
 
1458
1147
  Parameters
1459
1148
  ----------
1460
- packages : Dict[str, str], default {}
1461
- Packages to use for this flow. The key is the name of the package
1149
+ packages : Dict[str, str], default: {}
1150
+ Packages to use for this step. The key is the name of the package
1462
1151
  and the value is the version to use.
1463
- libraries : Dict[str, str], default {}
1464
- Supported for backward compatibility. When used with packages, packages will take precedence.
1465
- python : str, optional, default None
1152
+ python : str, optional, default: None
1466
1153
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1467
1154
  that the version used will correspond to the version of the Python interpreter used to start the run.
1468
- disabled : bool, default False
1469
- If set to True, disables Conda.
1470
1155
  """
1471
1156
  ...
1472
1157
 
1473
1158
  @typing.overload
1474
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1159
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1475
1160
  ...
1476
1161
 
1477
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1162
+ @typing.overload
1163
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1164
+ ...
1165
+
1166
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1478
1167
  """
1479
- Specifies the Conda environment for all steps of the flow.
1168
+ Specifies the PyPI packages for the step.
1480
1169
 
1481
- Use `@conda_base` to set common libraries required by all
1482
- steps and use `@conda` to specify step-specific additions.
1170
+ Information in this decorator will augment any
1171
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
1172
+ you can use `@pypi_base` to set packages required by all
1173
+ steps and use `@pypi` to specify step-specific overrides.
1483
1174
 
1484
1175
 
1485
1176
  Parameters
1486
1177
  ----------
1487
- packages : Dict[str, str], default {}
1488
- Packages to use for this flow. The key is the name of the package
1178
+ packages : Dict[str, str], default: {}
1179
+ Packages to use for this step. The key is the name of the package
1489
1180
  and the value is the version to use.
1490
- libraries : Dict[str, str], default {}
1491
- Supported for backward compatibility. When used with packages, packages will take precedence.
1492
- python : str, optional, default None
1181
+ python : str, optional, default: None
1493
1182
  Version of Python to use, e.g. '3.7.4'. A default value of None implies
1494
1183
  that the version used will correspond to the version of the Python interpreter used to start the run.
1495
- disabled : bool, default False
1496
- If set to True, disables Conda.
1184
+ """
1185
+ ...
1186
+
1187
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1188
+ """
1189
+ Specifies that this step should execute on Kubernetes.
1190
+
1191
+
1192
+ Parameters
1193
+ ----------
1194
+ cpu : int, default 1
1195
+ Number of CPUs required for this step. If `@resources` is
1196
+ also present, the maximum value from all decorators is used.
1197
+ memory : int, default 4096
1198
+ Memory size (in MB) required for this step. If
1199
+ `@resources` is also present, the maximum value from all decorators is
1200
+ used.
1201
+ disk : int, default 10240
1202
+ Disk size (in MB) required for this step. If
1203
+ `@resources` is also present, the maximum value from all decorators is
1204
+ used.
1205
+ image : str, optional, default None
1206
+ Docker image to use when launching on Kubernetes. If not specified, and
1207
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1208
+ not, a default Docker image mapping to the current version of Python is used.
1209
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1210
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
1211
+ image_pull_secrets: List[str], default []
1212
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
1213
+ Kubernetes image pull secrets to use when pulling container images
1214
+ in Kubernetes.
1215
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1216
+ Kubernetes service account to use when launching pod in Kubernetes.
1217
+ secrets : List[str], optional, default None
1218
+ Kubernetes secrets to use when launching pod in Kubernetes. These
1219
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1220
+ in Metaflow configuration.
1221
+ node_selector: Union[Dict[str,str], str], optional, default None
1222
+ Kubernetes node selector(s) to apply to the pod running the task.
1223
+ Can be passed in as a comma separated string of values e.g.
1224
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
1225
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
1226
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1227
+ Kubernetes namespace to use when launching pod in Kubernetes.
1228
+ gpu : int, optional, default None
1229
+ Number of GPUs required for this step. A value of zero implies that
1230
+ the scheduled node should not have GPUs.
1231
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1232
+ The vendor of the GPUs to be used for this step.
1233
+ tolerations : List[str], default []
1234
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1235
+ Kubernetes tolerations to use when launching pod in Kubernetes.
1236
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
1237
+ Kubernetes labels to use when launching pod in Kubernetes.
1238
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
1239
+ Kubernetes annotations to use when launching pod in Kubernetes.
1240
+ use_tmpfs : bool, default False
1241
+ This enables an explicit tmpfs mount for this step.
1242
+ tmpfs_tempdir : bool, default True
1243
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1244
+ tmpfs_size : int, optional, default: None
1245
+ The value for the size (in MiB) of the tmpfs mount for this step.
1246
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1247
+ memory allocated for this step.
1248
+ tmpfs_path : str, optional, default /metaflow_temp
1249
+ Path to tmpfs mount for this step.
1250
+ persistent_volume_claims : Dict[str, str], optional, default None
1251
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1252
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1253
+ shared_memory: int, optional
1254
+ Shared memory size (in MiB) required for this step
1255
+ port: int, optional
1256
+ Port number to specify in the Kubernetes job object
1257
+ compute_pool : str, optional, default None
1258
+ Compute pool to be used for for this step.
1259
+ If not specified, any accessible compute pool within the perimeter is used.
1260
+ hostname_resolution_timeout: int, default 10 * 60
1261
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
1262
+ Only applicable when @parallel is used.
1263
+ qos: str, default: Burstable
1264
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
1265
+
1266
+ security_context: Dict[str, Any], optional, default None
1267
+ Container security context. Applies to the task container. Allows the following keys:
1268
+ - privileged: bool, optional, default None
1269
+ - allow_privilege_escalation: bool, optional, default None
1270
+ - run_as_user: int, optional, default None
1271
+ - run_as_group: int, optional, default None
1272
+ - run_as_non_root: bool, optional, default None
1497
1273
  """
1498
1274
  ...
1499
1275
 
@@ -1540,6 +1316,41 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
1540
1316
  """
1541
1317
  ...
1542
1318
 
1319
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1320
+ """
1321
+ Specifies what flows belong to the same project.
1322
+
1323
+ A project-specific namespace is created for all flows that
1324
+ use the same `@project(name)`.
1325
+
1326
+
1327
+ Parameters
1328
+ ----------
1329
+ name : str
1330
+ Project name. Make sure that the name is unique amongst all
1331
+ projects that use the same production scheduler. The name may
1332
+ contain only lowercase alphanumeric characters and underscores.
1333
+
1334
+ branch : Optional[str], default None
1335
+ The branch to use. If not specified, the branch is set to
1336
+ `user.<username>` unless `production` is set to `True`. This can
1337
+ also be set on the command line using `--branch` as a top-level option.
1338
+ It is an error to specify `branch` in the decorator and on the command line.
1339
+
1340
+ production : bool, default False
1341
+ Whether or not the branch is the production branch. This can also be set on the
1342
+ command line using `--production` as a top-level option. It is an error to specify
1343
+ `production` in the decorator and on the command line.
1344
+ The project branch name will be:
1345
+ - if `branch` is specified:
1346
+ - if `production` is True: `prod.<branch>`
1347
+ - if `production` is False: `test.<branch>`
1348
+ - if `branch` is not specified:
1349
+ - if `production` is True: `prod`
1350
+ - if `production` is False: `user.<username>`
1351
+ """
1352
+ ...
1353
+
1543
1354
  @typing.overload
1544
1355
  def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1545
1356
  """
@@ -1682,6 +1493,49 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1682
1493
  """
1683
1494
  ...
1684
1495
 
1496
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1497
+ """
1498
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1499
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1500
+
1501
+
1502
+ Parameters
1503
+ ----------
1504
+ timeout : int
1505
+ Time, in seconds before the task times out and fails. (Default: 3600)
1506
+ poke_interval : int
1507
+ Time in seconds that the job should wait in between each try. (Default: 60)
1508
+ mode : str
1509
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1510
+ exponential_backoff : bool
1511
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1512
+ pool : str
1513
+ the slot pool this task should run in,
1514
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1515
+ soft_fail : bool
1516
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1517
+ name : str
1518
+ Name of the sensor on Airflow
1519
+ description : str
1520
+ Description of sensor in the Airflow UI
1521
+ external_dag_id : str
1522
+ The dag_id that contains the task you want to wait for.
1523
+ external_task_ids : List[str]
1524
+ The list of task_ids that you want to wait for.
1525
+ If None (default value) the sensor waits for the DAG. (Default: None)
1526
+ allowed_states : List[str]
1527
+ Iterable of allowed states, (Default: ['success'])
1528
+ failed_states : List[str]
1529
+ Iterable of failed or dis-allowed states. (Default: None)
1530
+ execution_delta : datetime.timedelta
1531
+ time difference with the previous execution to look at,
1532
+ the default is the same logical date as the current task or DAG. (Default: None)
1533
+ check_existence: bool
1534
+ Set to True to check if the external task exists or check if
1535
+ the DAG to wait for exists. (Default: True)
1536
+ """
1537
+ ...
1538
+
1685
1539
  @typing.overload
1686
1540
  def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1687
1541
  """
@@ -1826,5 +1680,170 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1826
1680
  """
1827
1681
  ...
1828
1682
 
1683
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1684
+ """
1685
+ Allows setting external datastores to save data for the
1686
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1687
+
1688
+ This decorator is useful when users wish to save data to a different datastore
1689
+ than what is configured in Metaflow. This can be for variety of reasons:
1690
+
1691
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1692
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1693
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1694
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1695
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1696
+
1697
+ Usage:
1698
+ ----------
1699
+
1700
+ - Using a custom IAM role to access the datastore.
1701
+
1702
+ ```python
1703
+ @with_artifact_store(
1704
+ type="s3",
1705
+ config=lambda: {
1706
+ "root": "s3://my-bucket-foo/path/to/root",
1707
+ "role_arn": ROLE,
1708
+ },
1709
+ )
1710
+ class MyFlow(FlowSpec):
1711
+
1712
+ @checkpoint
1713
+ @step
1714
+ def start(self):
1715
+ with open("my_file.txt", "w") as f:
1716
+ f.write("Hello, World!")
1717
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1718
+ self.next(self.end)
1719
+
1720
+ ```
1721
+
1722
+ - Using credentials to access the s3-compatible datastore.
1723
+
1724
+ ```python
1725
+ @with_artifact_store(
1726
+ type="s3",
1727
+ config=lambda: {
1728
+ "root": "s3://my-bucket-foo/path/to/root",
1729
+ "client_params": {
1730
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1731
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1732
+ },
1733
+ },
1734
+ )
1735
+ class MyFlow(FlowSpec):
1736
+
1737
+ @checkpoint
1738
+ @step
1739
+ def start(self):
1740
+ with open("my_file.txt", "w") as f:
1741
+ f.write("Hello, World!")
1742
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1743
+ self.next(self.end)
1744
+
1745
+ ```
1746
+
1747
+ - Accessing objects stored in external datastores after task execution.
1748
+
1749
+ ```python
1750
+ run = Run("CheckpointsTestsFlow/8992")
1751
+ with artifact_store_from(run=run, config={
1752
+ "client_params": {
1753
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1754
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1755
+ },
1756
+ }):
1757
+ with Checkpoint() as cp:
1758
+ latest = cp.list(
1759
+ task=run["start"].task
1760
+ )[0]
1761
+ print(latest)
1762
+ cp.load(
1763
+ latest,
1764
+ "test-checkpoints"
1765
+ )
1766
+
1767
+ task = Task("TorchTuneFlow/8484/train/53673")
1768
+ with artifact_store_from(run=run, config={
1769
+ "client_params": {
1770
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1771
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1772
+ },
1773
+ }):
1774
+ load_model(
1775
+ task.data.model_ref,
1776
+ "test-models"
1777
+ )
1778
+ ```
1779
+ Parameters:
1780
+ ----------
1781
+
1782
+ type: str
1783
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1784
+
1785
+ config: dict or Callable
1786
+ Dictionary of configuration options for the datastore. The following keys are required:
1787
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1788
+ - example: 's3://bucket-name/path/to/root'
1789
+ - example: 'gs://bucket-name/path/to/root'
1790
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1791
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1792
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1793
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1794
+ """
1795
+ ...
1796
+
1797
+ @typing.overload
1798
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1799
+ """
1800
+ Specifies the Conda environment for all steps of the flow.
1801
+
1802
+ Use `@conda_base` to set common libraries required by all
1803
+ steps and use `@conda` to specify step-specific additions.
1804
+
1805
+
1806
+ Parameters
1807
+ ----------
1808
+ packages : Dict[str, str], default {}
1809
+ Packages to use for this flow. The key is the name of the package
1810
+ and the value is the version to use.
1811
+ libraries : Dict[str, str], default {}
1812
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1813
+ python : str, optional, default None
1814
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1815
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1816
+ disabled : bool, default False
1817
+ If set to True, disables Conda.
1818
+ """
1819
+ ...
1820
+
1821
+ @typing.overload
1822
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1823
+ ...
1824
+
1825
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1826
+ """
1827
+ Specifies the Conda environment for all steps of the flow.
1828
+
1829
+ Use `@conda_base` to set common libraries required by all
1830
+ steps and use `@conda` to specify step-specific additions.
1831
+
1832
+
1833
+ Parameters
1834
+ ----------
1835
+ packages : Dict[str, str], default {}
1836
+ Packages to use for this flow. The key is the name of the package
1837
+ and the value is the version to use.
1838
+ libraries : Dict[str, str], default {}
1839
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1840
+ python : str, optional, default None
1841
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1842
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1843
+ disabled : bool, default False
1844
+ If set to True, disables Conda.
1845
+ """
1846
+ ...
1847
+
1829
1848
  pkg_name: str
1830
1849