ob-metaflow-stubs 6.0.3.188rc4__py2.py3-none-any.whl → 6.0.4.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. metaflow-stubs/__init__.pyi +938 -934
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +3 -3
  8. metaflow-stubs/client/filecache.pyi +3 -3
  9. metaflow-stubs/events.pyi +3 -3
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +5 -5
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +2 -2
  14. metaflow-stubs/info_file.pyi +2 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +3 -3
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +2 -2
  20. metaflow-stubs/metaflow_current.pyi +45 -45
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +2 -2
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +4 -4
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +4 -4
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +5 -5
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +4 -4
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +3 -3
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +3 -3
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +4 -4
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +2 -2
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +4 -4
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +3 -3
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +3 -3
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +3 -3
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +38 -31
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +3 -3
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +3 -3
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +12 -3
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +29 -27
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +5 -4
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +3 -3
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +5 -5
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +2 -2
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +3 -3
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +3 -3
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +2 -2
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +4 -4
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +3 -3
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +2 -2
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +2 -2
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -2
  115. metaflow-stubs/multicore_utils.pyi +2 -2
  116. metaflow-stubs/ob_internal.pyi +2 -2
  117. metaflow-stubs/parameters.pyi +3 -3
  118. metaflow-stubs/plugins/__init__.pyi +13 -11
  119. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  120. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  121. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  122. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  123. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  124. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  125. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  126. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  127. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  128. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  129. metaflow-stubs/plugins/argo/argo_workflows.pyi +7 -34
  130. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  131. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +4 -4
  132. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +3 -3
  133. metaflow-stubs/plugins/argo/exit_hooks.pyi +45 -0
  134. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  135. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  136. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  137. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  138. metaflow-stubs/plugins/aws/batch/batch.pyi +2 -2
  139. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  140. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -2
  141. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  142. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +4 -4
  143. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  144. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  145. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  146. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +2 -2
  147. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  148. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  149. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +4 -4
  150. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  151. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  152. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  153. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +4 -4
  154. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  155. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  156. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  157. metaflow-stubs/plugins/cards/__init__.pyi +6 -6
  158. metaflow-stubs/plugins/cards/card_client.pyi +3 -3
  159. metaflow-stubs/plugins/cards/card_creator.pyi +4 -3
  160. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  161. metaflow-stubs/plugins/cards/card_decorator.pyi +14 -3
  162. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  163. metaflow-stubs/plugins/cards/card_modules/basic.pyi +3 -3
  164. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_modules/components.pyi +4 -4
  166. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  167. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +10 -2
  169. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  170. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  171. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  172. metaflow-stubs/plugins/catch_decorator.pyi +3 -3
  173. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  174. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  175. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  176. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  177. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  178. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  179. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  180. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  181. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  182. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  183. metaflow-stubs/plugins/exit_hook/__init__.pyi +11 -0
  184. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +20 -0
  185. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  186. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  187. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  188. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +4 -4
  189. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  190. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  191. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  192. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  193. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  194. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +3 -3
  195. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +2 -2
  196. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  197. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +2 -2
  198. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  199. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  200. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  201. metaflow-stubs/plugins/parallel_decorator.pyi +2 -2
  202. metaflow-stubs/plugins/perimeters.pyi +2 -2
  203. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  204. metaflow-stubs/plugins/pypi/__init__.pyi +3 -3
  205. metaflow-stubs/plugins/pypi/conda_decorator.pyi +2 -2
  206. metaflow-stubs/plugins/pypi/conda_environment.pyi +6 -6
  207. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  208. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +2 -2
  209. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  210. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  211. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  212. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  213. metaflow-stubs/plugins/secrets/__init__.pyi +6 -2
  214. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +3 -3
  215. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +9 -49
  216. metaflow-stubs/plugins/secrets/secrets_func.pyi +31 -0
  217. metaflow-stubs/plugins/secrets/secrets_spec.pyi +42 -0
  218. metaflow-stubs/plugins/secrets/utils.pyi +28 -0
  219. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  220. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  221. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  222. metaflow-stubs/plugins/timeout_decorator.pyi +3 -3
  223. metaflow-stubs/plugins/torchtune/__init__.pyi +2 -2
  224. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  225. metaflow-stubs/plugins/uv/uv_environment.pyi +3 -3
  226. metaflow-stubs/profilers/__init__.pyi +2 -2
  227. metaflow-stubs/pylint_wrapper.pyi +2 -2
  228. metaflow-stubs/runner/__init__.pyi +2 -2
  229. metaflow-stubs/runner/deployer.pyi +28 -28
  230. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  231. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  232. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  233. metaflow-stubs/runner/nbrun.pyi +2 -2
  234. metaflow-stubs/runner/subprocess_manager.pyi +2 -2
  235. metaflow-stubs/runner/utils.pyi +4 -4
  236. metaflow-stubs/system/__init__.pyi +2 -2
  237. metaflow-stubs/system/system_logger.pyi +2 -2
  238. metaflow-stubs/system/system_monitor.pyi +2 -2
  239. metaflow-stubs/tagging_util.pyi +2 -2
  240. metaflow-stubs/tuple_util.pyi +2 -2
  241. metaflow-stubs/user_configs/__init__.pyi +2 -2
  242. metaflow-stubs/user_configs/config_decorators.pyi +6 -6
  243. metaflow-stubs/user_configs/config_options.pyi +4 -4
  244. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  245. {ob_metaflow_stubs-6.0.3.188rc4.dist-info → ob_metaflow_stubs-6.0.4.0.dist-info}/METADATA +1 -1
  246. ob_metaflow_stubs-6.0.4.0.dist-info/RECORD +249 -0
  247. ob_metaflow_stubs-6.0.3.188rc4.dist-info/RECORD +0 -243
  248. {ob_metaflow_stubs-6.0.3.188rc4.dist-info → ob_metaflow_stubs-6.0.4.0.dist-info}/WHEEL +0 -0
  249. {ob_metaflow_stubs-6.0.3.188rc4.dist-info → ob_metaflow_stubs-6.0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.15.18.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-10T23:14:33.334494 #
3
+ # MF version: 2.15.21.1+obcheckpoint(0.2.4);ob(v1) #
4
+ # Generated on 2025-07-11T23:29:18.665907 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -35,18 +35,18 @@ from .user_configs.config_parameters import ConfigValue as ConfigValue
35
35
  from .user_configs.config_parameters import config_expr as config_expr
36
36
  from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
37
  from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
- from . import metaflow_git as metaflow_git
39
38
  from . import cards as cards
40
39
  from . import tuple_util as tuple_util
41
40
  from . import events as events
41
+ from . import metaflow_git as metaflow_git
42
42
  from . import runner as runner
43
43
  from . import plugins as plugins
44
44
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
45
  from . import includefile as includefile
46
46
  from .includefile import IncludeFile as IncludeFile
47
47
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
48
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
49
48
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
49
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
50
50
  from . import client as client
51
51
  from .client.core import namespace as namespace
52
52
  from .client.core import get_namespace as get_namespace
@@ -156,192 +156,334 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
156
156
  """
157
157
  ...
158
158
 
159
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
159
+ @typing.overload
160
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
160
161
  """
161
- Specifies that this step should execute on Kubernetes.
162
-
163
-
164
- Parameters
165
- ----------
166
- cpu : int, default 1
167
- Number of CPUs required for this step. If `@resources` is
168
- also present, the maximum value from all decorators is used.
169
- memory : int, default 4096
170
- Memory size (in MB) required for this step. If
171
- `@resources` is also present, the maximum value from all decorators is
172
- used.
173
- disk : int, default 10240
174
- Disk size (in MB) required for this step. If
175
- `@resources` is also present, the maximum value from all decorators is
176
- used.
177
- image : str, optional, default None
178
- Docker image to use when launching on Kubernetes. If not specified, and
179
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
180
- not, a default Docker image mapping to the current version of Python is used.
181
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
182
- If given, the imagePullPolicy to be applied to the Docker image of the step.
183
- image_pull_secrets: List[str], default []
184
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
185
- Kubernetes image pull secrets to use when pulling container images
186
- in Kubernetes.
187
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
188
- Kubernetes service account to use when launching pod in Kubernetes.
189
- secrets : List[str], optional, default None
190
- Kubernetes secrets to use when launching pod in Kubernetes. These
191
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
192
- in Metaflow configuration.
193
- node_selector: Union[Dict[str,str], str], optional, default None
194
- Kubernetes node selector(s) to apply to the pod running the task.
195
- Can be passed in as a comma separated string of values e.g.
196
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
197
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
198
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
199
- Kubernetes namespace to use when launching pod in Kubernetes.
200
- gpu : int, optional, default None
201
- Number of GPUs required for this step. A value of zero implies that
202
- the scheduled node should not have GPUs.
203
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
204
- The vendor of the GPUs to be used for this step.
205
- tolerations : List[str], default []
206
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
207
- Kubernetes tolerations to use when launching pod in Kubernetes.
208
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
209
- Kubernetes labels to use when launching pod in Kubernetes.
210
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
211
- Kubernetes annotations to use when launching pod in Kubernetes.
212
- use_tmpfs : bool, default False
213
- This enables an explicit tmpfs mount for this step.
214
- tmpfs_tempdir : bool, default True
215
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
216
- tmpfs_size : int, optional, default: None
217
- The value for the size (in MiB) of the tmpfs mount for this step.
218
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
219
- memory allocated for this step.
220
- tmpfs_path : str, optional, default /metaflow_temp
221
- Path to tmpfs mount for this step.
222
- persistent_volume_claims : Dict[str, str], optional, default None
223
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
224
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
225
- shared_memory: int, optional
226
- Shared memory size (in MiB) required for this step
227
- port: int, optional
228
- Port number to specify in the Kubernetes job object
229
- compute_pool : str, optional, default None
230
- Compute pool to be used for for this step.
231
- If not specified, any accessible compute pool within the perimeter is used.
232
- hostname_resolution_timeout: int, default 10 * 60
233
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
234
- Only applicable when @parallel is used.
235
- qos: str, default: Burstable
236
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
237
-
238
- security_context: Dict[str, Any], optional, default None
239
- Container security context. Applies to the task container. Allows the following keys:
240
- - privileged: bool, optional, default None
241
- - allow_privilege_escalation: bool, optional, default None
242
- - run_as_user: int, optional, default None
243
- - run_as_group: int, optional, default None
244
- - run_as_non_root: bool, optional, default None
162
+ Decorator prototype for all step decorators. This function gets specialized
163
+ and imported for all decorators types by _import_plugin_decorators().
245
164
  """
246
165
  ...
247
166
 
248
167
  @typing.overload
249
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
168
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
169
+ ...
170
+
171
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
250
172
  """
251
- Specifies the Conda environment for the step.
252
-
253
- Information in this decorator will augment any
254
- attributes set in the `@conda_base` flow-level decorator. Hence,
255
- you can use `@conda_base` to set packages required by all
256
- steps and use `@conda` to specify step-specific overrides.
173
+ Decorator prototype for all step decorators. This function gets specialized
174
+ and imported for all decorators types by _import_plugin_decorators().
175
+ """
176
+ ...
177
+
178
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
179
+ """
180
+ Specifies that this step should execute on DGX cloud.
257
181
 
258
182
 
259
183
  Parameters
260
184
  ----------
261
- packages : Dict[str, str], default {}
262
- Packages to use for this step. The key is the name of the package
263
- and the value is the version to use.
264
- libraries : Dict[str, str], default {}
265
- Supported for backward compatibility. When used with packages, packages will take precedence.
266
- python : str, optional, default None
267
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
268
- that the version used will correspond to the version of the Python interpreter used to start the run.
269
- disabled : bool, default False
270
- If set to True, disables @conda.
185
+ gpu : int
186
+ Number of GPUs to use.
187
+ gpu_type : str
188
+ Type of Nvidia GPU to use.
271
189
  """
272
190
  ...
273
191
 
274
192
  @typing.overload
275
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
193
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
194
+ """
195
+ Internal decorator to support Fast bakery
196
+ """
276
197
  ...
277
198
 
278
199
  @typing.overload
279
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
200
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
280
201
  ...
281
202
 
282
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
203
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
283
204
  """
284
- Specifies the Conda environment for the step.
205
+ Internal decorator to support Fast bakery
206
+ """
207
+ ...
208
+
209
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
210
+ """
211
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
285
212
 
286
- Information in this decorator will augment any
287
- attributes set in the `@conda_base` flow-level decorator. Hence,
288
- you can use `@conda_base` to set packages required by all
289
- steps and use `@conda` to specify step-specific overrides.
213
+ User code call
214
+ --------------
215
+ @vllm(
216
+ model="...",
217
+ ...
218
+ )
219
+
220
+ Valid backend options
221
+ ---------------------
222
+ - 'local': Run as a separate process on the local task machine.
223
+
224
+ Valid model options
225
+ -------------------
226
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
227
+
228
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
229
+ If you need multiple models, you must create multiple @vllm decorators.
290
230
 
291
231
 
292
232
  Parameters
293
233
  ----------
294
- packages : Dict[str, str], default {}
295
- Packages to use for this step. The key is the name of the package
296
- and the value is the version to use.
297
- libraries : Dict[str, str], default {}
298
- Supported for backward compatibility. When used with packages, packages will take precedence.
299
- python : str, optional, default None
300
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
301
- that the version used will correspond to the version of the Python interpreter used to start the run.
302
- disabled : bool, default False
303
- If set to True, disables @conda.
234
+ model: str
235
+ HuggingFace model identifier to be served by vLLM.
236
+ backend: str
237
+ Determines where and how to run the vLLM process.
238
+ openai_api_server: bool
239
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
240
+ Default is False (uses native engine).
241
+ Set to True for backward compatibility with existing code.
242
+ debug: bool
243
+ Whether to turn on verbose debugging logs.
244
+ card_refresh_interval: int
245
+ Interval in seconds for refreshing the vLLM status card.
246
+ Only used when openai_api_server=True.
247
+ max_retries: int
248
+ Maximum number of retries checking for vLLM server startup.
249
+ Only used when openai_api_server=True.
250
+ retry_alert_frequency: int
251
+ Frequency of alert logs for vLLM server startup retries.
252
+ Only used when openai_api_server=True.
253
+ engine_args : dict
254
+ Additional keyword arguments to pass to the vLLM engine.
255
+ For example, `tensor_parallel_size=2`.
304
256
  """
305
257
  ...
306
258
 
307
- @typing.overload
308
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
259
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
309
260
  """
310
- Specifies the resources needed when executing this step.
261
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
311
262
 
312
- Use `@resources` to specify the resource requirements
313
- independently of the specific compute layer (`@batch`, `@kubernetes`).
263
+ User code call
264
+ --------------
265
+ @ollama(
266
+ models=[...],
267
+ ...
268
+ )
314
269
 
315
- You can choose the compute layer on the command line by executing e.g.
316
- ```
317
- python myflow.py run --with batch
318
- ```
319
- or
320
- ```
321
- python myflow.py run --with kubernetes
322
- ```
323
- which executes the flow on the desired system using the
324
- requirements specified in `@resources`.
270
+ Valid backend options
271
+ ---------------------
272
+ - 'local': Run as a separate process on the local task machine.
273
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
274
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
275
+
276
+ Valid model options
277
+ -------------------
278
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
325
279
 
326
280
 
327
281
  Parameters
328
282
  ----------
329
- cpu : int, default 1
330
- Number of CPUs required for this step.
331
- gpu : int, optional, default None
332
- Number of GPUs required for this step.
333
- disk : int, optional, default None
334
- Disk size (in MB) required for this step. Only applies on Kubernetes.
335
- memory : int, default 4096
336
- Memory size (in MB) required for this step.
337
- shared_memory : int, optional, default None
338
- The value for the size (in MiB) of the /dev/shm volume for this step.
339
- This parameter maps to the `--shm-size` option in Docker.
283
+ models: list[str]
284
+ List of Ollama containers running models in sidecars.
285
+ backend: str
286
+ Determines where and how to run the Ollama process.
287
+ force_pull: bool
288
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
289
+ cache_update_policy: str
290
+ Cache update policy: "auto", "force", or "never".
291
+ force_cache_update: bool
292
+ Simple override for "force" cache update policy.
293
+ debug: bool
294
+ Whether to turn on verbose debugging logs.
295
+ circuit_breaker_config: dict
296
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
297
+ timeout_config: dict
298
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
340
299
  """
341
300
  ...
342
301
 
343
302
  @typing.overload
344
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
303
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
304
+ """
305
+ Enables checkpointing for a step.
306
+
307
+ > Examples
308
+
309
+ - Saving Checkpoints
310
+
311
+ ```python
312
+ @checkpoint
313
+ @step
314
+ def train(self):
315
+ model = create_model(self.parameters, checkpoint_path = None)
316
+ for i in range(self.epochs):
317
+ # some training logic
318
+ loss = model.train(self.dataset)
319
+ if i % 10 == 0:
320
+ model.save(
321
+ current.checkpoint.directory,
322
+ )
323
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
324
+ # and returns a reference dictionary to the checkpoint saved in the datastore
325
+ self.latest_checkpoint = current.checkpoint.save(
326
+ name="epoch_checkpoint",
327
+ metadata={
328
+ "epoch": i,
329
+ "loss": loss,
330
+ }
331
+ )
332
+ ```
333
+
334
+ - Using Loaded Checkpoints
335
+
336
+ ```python
337
+ @retry(times=3)
338
+ @checkpoint
339
+ @step
340
+ def train(self):
341
+ # Assume that the task has restarted and the previous attempt of the task
342
+ # saved a checkpoint
343
+ checkpoint_path = None
344
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
345
+ print("Loaded checkpoint from the previous attempt")
346
+ checkpoint_path = current.checkpoint.directory
347
+
348
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
349
+ for i in range(self.epochs):
350
+ ...
351
+ ```
352
+
353
+
354
+ Parameters
355
+ ----------
356
+ load_policy : str, default: "fresh"
357
+ The policy for loading the checkpoint. The following policies are supported:
358
+ - "eager": Loads the the latest available checkpoint within the namespace.
359
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
360
+ will be loaded at the start of the task.
361
+ - "none": Do not load any checkpoint
362
+ - "fresh": Loads the lastest checkpoint created within the running Task.
363
+ This mode helps loading checkpoints across various retry attempts of the same task.
364
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
365
+ created within the task will be loaded when the task is retries execution on failure.
366
+
367
+ temp_dir_root : str, default: None
368
+ The root directory under which `current.checkpoint.directory` will be created.
369
+ """
370
+ ...
371
+
372
+ @typing.overload
373
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
374
+ ...
375
+
376
+ @typing.overload
377
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
378
+ ...
379
+
380
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
381
+ """
382
+ Enables checkpointing for a step.
383
+
384
+ > Examples
385
+
386
+ - Saving Checkpoints
387
+
388
+ ```python
389
+ @checkpoint
390
+ @step
391
+ def train(self):
392
+ model = create_model(self.parameters, checkpoint_path = None)
393
+ for i in range(self.epochs):
394
+ # some training logic
395
+ loss = model.train(self.dataset)
396
+ if i % 10 == 0:
397
+ model.save(
398
+ current.checkpoint.directory,
399
+ )
400
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
401
+ # and returns a reference dictionary to the checkpoint saved in the datastore
402
+ self.latest_checkpoint = current.checkpoint.save(
403
+ name="epoch_checkpoint",
404
+ metadata={
405
+ "epoch": i,
406
+ "loss": loss,
407
+ }
408
+ )
409
+ ```
410
+
411
+ - Using Loaded Checkpoints
412
+
413
+ ```python
414
+ @retry(times=3)
415
+ @checkpoint
416
+ @step
417
+ def train(self):
418
+ # Assume that the task has restarted and the previous attempt of the task
419
+ # saved a checkpoint
420
+ checkpoint_path = None
421
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
422
+ print("Loaded checkpoint from the previous attempt")
423
+ checkpoint_path = current.checkpoint.directory
424
+
425
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
426
+ for i in range(self.epochs):
427
+ ...
428
+ ```
429
+
430
+
431
+ Parameters
432
+ ----------
433
+ load_policy : str, default: "fresh"
434
+ The policy for loading the checkpoint. The following policies are supported:
435
+ - "eager": Loads the the latest available checkpoint within the namespace.
436
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
437
+ will be loaded at the start of the task.
438
+ - "none": Do not load any checkpoint
439
+ - "fresh": Loads the lastest checkpoint created within the running Task.
440
+ This mode helps loading checkpoints across various retry attempts of the same task.
441
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
442
+ created within the task will be loaded when the task is retries execution on failure.
443
+
444
+ temp_dir_root : str, default: None
445
+ The root directory under which `current.checkpoint.directory` will be created.
446
+ """
447
+ ...
448
+
449
+ @typing.overload
450
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
451
+ """
452
+ Specifies the resources needed when executing this step.
453
+
454
+ Use `@resources` to specify the resource requirements
455
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
456
+
457
+ You can choose the compute layer on the command line by executing e.g.
458
+ ```
459
+ python myflow.py run --with batch
460
+ ```
461
+ or
462
+ ```
463
+ python myflow.py run --with kubernetes
464
+ ```
465
+ which executes the flow on the desired system using the
466
+ requirements specified in `@resources`.
467
+
468
+
469
+ Parameters
470
+ ----------
471
+ cpu : int, default 1
472
+ Number of CPUs required for this step.
473
+ gpu : int, optional, default None
474
+ Number of GPUs required for this step.
475
+ disk : int, optional, default None
476
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
477
+ memory : int, default 4096
478
+ Memory size (in MB) required for this step.
479
+ shared_memory : int, optional, default None
480
+ The value for the size (in MiB) of the /dev/shm volume for this step.
481
+ This parameter maps to the `--shm-size` option in Docker.
482
+ """
483
+ ...
484
+
485
+ @typing.overload
486
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
345
487
  ...
346
488
 
347
489
  @typing.overload
@@ -384,119 +526,108 @@ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None]
384
526
  ...
385
527
 
386
528
  @typing.overload
387
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
529
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
388
530
  """
389
- Creates a human-readable report, a Metaflow Card, after this step completes.
390
-
391
- Note that you may add multiple `@card` decorators in a step with different parameters.
531
+ Specifies secrets to be retrieved and injected as environment variables prior to
532
+ the execution of a step.
392
533
 
393
534
 
394
535
  Parameters
395
536
  ----------
396
- type : str, default 'default'
397
- Card type.
398
- id : str, optional, default None
399
- If multiple cards are present, use this id to identify this card.
400
- options : Dict[str, Any], default {}
401
- Options passed to the card. The contents depend on the card type.
402
- timeout : int, default 45
403
- Interrupt reporting if it takes more than this many seconds.
537
+ sources : List[Union[str, Dict[str, Any]]], default: []
538
+ List of secret specs, defining how the secrets are to be retrieved
539
+ role : str, optional, default: None
540
+ Role to use for fetching secrets
404
541
  """
405
542
  ...
406
543
 
407
544
  @typing.overload
408
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
545
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
409
546
  ...
410
547
 
411
548
  @typing.overload
412
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
549
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
413
550
  ...
414
551
 
415
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
552
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
416
553
  """
417
- Creates a human-readable report, a Metaflow Card, after this step completes.
418
-
419
- Note that you may add multiple `@card` decorators in a step with different parameters.
554
+ Specifies secrets to be retrieved and injected as environment variables prior to
555
+ the execution of a step.
420
556
 
421
557
 
422
558
  Parameters
423
559
  ----------
424
- type : str, default 'default'
425
- Card type.
426
- id : str, optional, default None
427
- If multiple cards are present, use this id to identify this card.
428
- options : Dict[str, Any], default {}
429
- Options passed to the card. The contents depend on the card type.
430
- timeout : int, default 45
431
- Interrupt reporting if it takes more than this many seconds.
560
+ sources : List[Union[str, Dict[str, Any]]], default: []
561
+ List of secret specs, defining how the secrets are to be retrieved
562
+ role : str, optional, default: None
563
+ Role to use for fetching secrets
432
564
  """
433
565
  ...
434
566
 
435
567
  @typing.overload
436
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
568
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
437
569
  """
438
- Specifies that the step will success under all circumstances.
570
+ Specifies the PyPI packages for the step.
439
571
 
440
- The decorator will create an optional artifact, specified by `var`, which
441
- contains the exception raised. You can use it to detect the presence
442
- of errors, indicating that all happy-path artifacts produced by the step
443
- are missing.
572
+ Information in this decorator will augment any
573
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
574
+ you can use `@pypi_base` to set packages required by all
575
+ steps and use `@pypi` to specify step-specific overrides.
444
576
 
445
577
 
446
578
  Parameters
447
579
  ----------
448
- var : str, optional, default None
449
- Name of the artifact in which to store the caught exception.
450
- If not specified, the exception is not stored.
451
- print_exception : bool, default True
452
- Determines whether or not the exception is printed to
453
- stdout when caught.
580
+ packages : Dict[str, str], default: {}
581
+ Packages to use for this step. The key is the name of the package
582
+ and the value is the version to use.
583
+ python : str, optional, default: None
584
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
585
+ that the version used will correspond to the version of the Python interpreter used to start the run.
454
586
  """
455
587
  ...
456
588
 
457
589
  @typing.overload
458
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
590
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
459
591
  ...
460
592
 
461
593
  @typing.overload
462
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
594
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
463
595
  ...
464
596
 
465
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
597
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
466
598
  """
467
- Specifies that the step will success under all circumstances.
599
+ Specifies the PyPI packages for the step.
468
600
 
469
- The decorator will create an optional artifact, specified by `var`, which
470
- contains the exception raised. You can use it to detect the presence
471
- of errors, indicating that all happy-path artifacts produced by the step
472
- are missing.
601
+ Information in this decorator will augment any
602
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
603
+ you can use `@pypi_base` to set packages required by all
604
+ steps and use `@pypi` to specify step-specific overrides.
473
605
 
474
606
 
475
607
  Parameters
476
608
  ----------
477
- var : str, optional, default None
478
- Name of the artifact in which to store the caught exception.
479
- If not specified, the exception is not stored.
480
- print_exception : bool, default True
481
- Determines whether or not the exception is printed to
482
- stdout when caught.
483
- """
484
- ...
485
-
486
- @typing.overload
487
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
488
- """
489
- Internal decorator to support Fast bakery
609
+ packages : Dict[str, str], default: {}
610
+ Packages to use for this step. The key is the name of the package
611
+ and the value is the version to use.
612
+ python : str, optional, default: None
613
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
614
+ that the version used will correspond to the version of the Python interpreter used to start the run.
490
615
  """
491
616
  ...
492
617
 
493
- @typing.overload
494
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
495
- ...
496
-
497
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
618
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
498
619
  """
499
- Internal decorator to support Fast bakery
620
+ Specifies that this step should execute on DGX cloud.
621
+
622
+
623
+ Parameters
624
+ ----------
625
+ gpu : int
626
+ Number of GPUs to use.
627
+ gpu_type : str
628
+ Type of Nvidia GPU to use.
629
+ queue_timeout : int
630
+ Time to keep the job in NVCF's queue.
500
631
  """
501
632
  ...
502
633
 
@@ -555,374 +686,221 @@ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
555
686
  """
556
687
  ...
557
688
 
558
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
689
+ @typing.overload
690
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
559
691
  """
560
- This decorator is used to run vllm APIs as Metaflow task sidecars.
561
-
562
- User code call
563
- --------------
564
- @vllm(
565
- model="...",
566
- ...
567
- )
568
-
569
- Valid backend options
570
- ---------------------
571
- - 'local': Run as a separate process on the local task machine.
572
-
573
- Valid model options
574
- -------------------
575
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
692
+ Specifies the Conda environment for the step.
576
693
 
577
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
578
- If you need multiple models, you must create multiple @vllm decorators.
694
+ Information in this decorator will augment any
695
+ attributes set in the `@conda_base` flow-level decorator. Hence,
696
+ you can use `@conda_base` to set packages required by all
697
+ steps and use `@conda` to specify step-specific overrides.
579
698
 
580
699
 
581
700
  Parameters
582
701
  ----------
583
- model: str
584
- HuggingFace model identifier to be served by vLLM.
585
- backend: str
586
- Determines where and how to run the vLLM process.
587
- openai_api_server: bool
588
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
589
- Default is False (uses native engine).
590
- Set to True for backward compatibility with existing code.
591
- debug: bool
592
- Whether to turn on verbose debugging logs.
593
- card_refresh_interval: int
594
- Interval in seconds for refreshing the vLLM status card.
595
- Only used when openai_api_server=True.
596
- max_retries: int
597
- Maximum number of retries checking for vLLM server startup.
598
- Only used when openai_api_server=True.
599
- retry_alert_frequency: int
600
- Frequency of alert logs for vLLM server startup retries.
601
- Only used when openai_api_server=True.
602
- engine_args : dict
603
- Additional keyword arguments to pass to the vLLM engine.
604
- For example, `tensor_parallel_size=2`.
702
+ packages : Dict[str, str], default {}
703
+ Packages to use for this step. The key is the name of the package
704
+ and the value is the version to use.
705
+ libraries : Dict[str, str], default {}
706
+ Supported for backward compatibility. When used with packages, packages will take precedence.
707
+ python : str, optional, default None
708
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
709
+ that the version used will correspond to the version of the Python interpreter used to start the run.
710
+ disabled : bool, default False
711
+ If set to True, disables @conda.
605
712
  """
606
713
  ...
607
714
 
608
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
715
+ @typing.overload
716
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
717
+ ...
718
+
719
+ @typing.overload
720
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
721
+ ...
722
+
723
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
609
724
  """
610
- Decorator that helps cache, version and store models/datasets from huggingface hub.
611
-
612
- > Examples
613
-
614
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
615
- ```python
616
- @huggingface_hub
617
- @step
618
- def pull_model_from_huggingface(self):
619
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
620
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
621
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
622
- # value of the function is a reference to the model in the backend storage.
623
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
725
+ Specifies the Conda environment for the step.
624
726
 
625
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
626
- self.llama_model = current.huggingface_hub.snapshot_download(
627
- repo_id=self.model_id,
628
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
629
- )
630
- self.next(self.train)
631
- ```
727
+ Information in this decorator will augment any
728
+ attributes set in the `@conda_base` flow-level decorator. Hence,
729
+ you can use `@conda_base` to set packages required by all
730
+ steps and use `@conda` to specify step-specific overrides.
632
731
 
633
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
634
- ```python
635
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
636
- @step
637
- def pull_model_from_huggingface(self):
638
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
639
- ```
640
732
 
641
- ```python
642
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
643
- @step
644
- def finetune_model(self):
645
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
646
- # path_to_model will be /my-directory
647
- ```
733
+ Parameters
734
+ ----------
735
+ packages : Dict[str, str], default {}
736
+ Packages to use for this step. The key is the name of the package
737
+ and the value is the version to use.
738
+ libraries : Dict[str, str], default {}
739
+ Supported for backward compatibility. When used with packages, packages will take precedence.
740
+ python : str, optional, default None
741
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
742
+ that the version used will correspond to the version of the Python interpreter used to start the run.
743
+ disabled : bool, default False
744
+ If set to True, disables @conda.
745
+ """
746
+ ...
747
+
748
+ @typing.overload
749
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
750
+ """
751
+ Creates a human-readable report, a Metaflow Card, after this step completes.
648
752
 
649
- ```python
650
- # Takes all the arguments passed to `snapshot_download`
651
- # except for `local_dir`
652
- @huggingface_hub(load=[
653
- {
654
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
655
- },
656
- {
657
- "repo_id": "myorg/mistral-lora",
658
- "repo_type": "model",
659
- },
660
- ])
661
- @step
662
- def finetune_model(self):
663
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
664
- # path_to_model will be /my-directory
665
- ```
753
+ Note that you may add multiple `@card` decorators in a step with different parameters.
666
754
 
667
755
 
668
756
  Parameters
669
757
  ----------
670
- temp_dir_root : str, optional
671
- The root directory that will hold the temporary directory where objects will be downloaded.
672
-
673
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
674
- The list of repos (models/datasets) to load.
758
+ type : str, default 'default'
759
+ Card type.
760
+ id : str, optional, default None
761
+ If multiple cards are present, use this id to identify this card.
762
+ options : Dict[str, Any], default {}
763
+ Options passed to the card. The contents depend on the card type.
764
+ timeout : int, default 45
765
+ Interrupt reporting if it takes more than this many seconds.
766
+ """
767
+ ...
768
+
769
+ @typing.overload
770
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
771
+ ...
772
+
773
+ @typing.overload
774
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
775
+ ...
776
+
777
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
778
+ """
779
+ Creates a human-readable report, a Metaflow Card, after this step completes.
675
780
 
676
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
781
+ Note that you may add multiple `@card` decorators in a step with different parameters.
677
782
 
678
- - If repo (model/dataset) is not found in the datastore:
679
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
680
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
681
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
682
783
 
683
- - If repo is found in the datastore:
684
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
784
+ Parameters
785
+ ----------
786
+ type : str, default 'default'
787
+ Card type.
788
+ id : str, optional, default None
789
+ If multiple cards are present, use this id to identify this card.
790
+ options : Dict[str, Any], default {}
791
+ Options passed to the card. The contents depend on the card type.
792
+ timeout : int, default 45
793
+ Interrupt reporting if it takes more than this many seconds.
685
794
  """
686
795
  ...
687
796
 
688
797
  @typing.overload
689
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
798
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
690
799
  """
691
- Enables checkpointing for a step.
800
+ Specifies that the step will success under all circumstances.
692
801
 
693
- > Examples
694
-
695
- - Saving Checkpoints
696
-
697
- ```python
698
- @checkpoint
699
- @step
700
- def train(self):
701
- model = create_model(self.parameters, checkpoint_path = None)
702
- for i in range(self.epochs):
703
- # some training logic
704
- loss = model.train(self.dataset)
705
- if i % 10 == 0:
706
- model.save(
707
- current.checkpoint.directory,
708
- )
709
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
710
- # and returns a reference dictionary to the checkpoint saved in the datastore
711
- self.latest_checkpoint = current.checkpoint.save(
712
- name="epoch_checkpoint",
713
- metadata={
714
- "epoch": i,
715
- "loss": loss,
716
- }
717
- )
718
- ```
719
-
720
- - Using Loaded Checkpoints
721
-
722
- ```python
723
- @retry(times=3)
724
- @checkpoint
725
- @step
726
- def train(self):
727
- # Assume that the task has restarted and the previous attempt of the task
728
- # saved a checkpoint
729
- checkpoint_path = None
730
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
731
- print("Loaded checkpoint from the previous attempt")
732
- checkpoint_path = current.checkpoint.directory
733
-
734
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
735
- for i in range(self.epochs):
736
- ...
737
- ```
802
+ The decorator will create an optional artifact, specified by `var`, which
803
+ contains the exception raised. You can use it to detect the presence
804
+ of errors, indicating that all happy-path artifacts produced by the step
805
+ are missing.
738
806
 
739
807
 
740
808
  Parameters
741
809
  ----------
742
- load_policy : str, default: "fresh"
743
- The policy for loading the checkpoint. The following policies are supported:
744
- - "eager": Loads the the latest available checkpoint within the namespace.
745
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
746
- will be loaded at the start of the task.
747
- - "none": Do not load any checkpoint
748
- - "fresh": Loads the lastest checkpoint created within the running Task.
749
- This mode helps loading checkpoints across various retry attempts of the same task.
750
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
751
- created within the task will be loaded when the task is retries execution on failure.
752
-
753
- temp_dir_root : str, default: None
754
- The root directory under which `current.checkpoint.directory` will be created.
810
+ var : str, optional, default None
811
+ Name of the artifact in which to store the caught exception.
812
+ If not specified, the exception is not stored.
813
+ print_exception : bool, default True
814
+ Determines whether or not the exception is printed to
815
+ stdout when caught.
755
816
  """
756
817
  ...
757
818
 
758
819
  @typing.overload
759
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
820
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
760
821
  ...
761
822
 
762
823
  @typing.overload
763
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
824
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
764
825
  ...
765
826
 
766
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
827
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
767
828
  """
768
- Enables checkpointing for a step.
769
-
770
- > Examples
771
-
772
- - Saving Checkpoints
773
-
774
- ```python
775
- @checkpoint
776
- @step
777
- def train(self):
778
- model = create_model(self.parameters, checkpoint_path = None)
779
- for i in range(self.epochs):
780
- # some training logic
781
- loss = model.train(self.dataset)
782
- if i % 10 == 0:
783
- model.save(
784
- current.checkpoint.directory,
785
- )
786
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
787
- # and returns a reference dictionary to the checkpoint saved in the datastore
788
- self.latest_checkpoint = current.checkpoint.save(
789
- name="epoch_checkpoint",
790
- metadata={
791
- "epoch": i,
792
- "loss": loss,
793
- }
794
- )
795
- ```
796
-
797
- - Using Loaded Checkpoints
798
-
799
- ```python
800
- @retry(times=3)
801
- @checkpoint
802
- @step
803
- def train(self):
804
- # Assume that the task has restarted and the previous attempt of the task
805
- # saved a checkpoint
806
- checkpoint_path = None
807
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
808
- print("Loaded checkpoint from the previous attempt")
809
- checkpoint_path = current.checkpoint.directory
829
+ Specifies that the step will success under all circumstances.
810
830
 
811
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
812
- for i in range(self.epochs):
813
- ...
814
- ```
831
+ The decorator will create an optional artifact, specified by `var`, which
832
+ contains the exception raised. You can use it to detect the presence
833
+ of errors, indicating that all happy-path artifacts produced by the step
834
+ are missing.
815
835
 
816
836
 
817
837
  Parameters
818
838
  ----------
819
- load_policy : str, default: "fresh"
820
- The policy for loading the checkpoint. The following policies are supported:
821
- - "eager": Loads the the latest available checkpoint within the namespace.
822
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
823
- will be loaded at the start of the task.
824
- - "none": Do not load any checkpoint
825
- - "fresh": Loads the lastest checkpoint created within the running Task.
826
- This mode helps loading checkpoints across various retry attempts of the same task.
827
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
828
- created within the task will be loaded when the task is retries execution on failure.
829
-
830
- temp_dir_root : str, default: None
831
- The root directory under which `current.checkpoint.directory` will be created.
839
+ var : str, optional, default None
840
+ Name of the artifact in which to store the caught exception.
841
+ If not specified, the exception is not stored.
842
+ print_exception : bool, default True
843
+ Determines whether or not the exception is printed to
844
+ stdout when caught.
832
845
  """
833
846
  ...
834
847
 
835
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
848
+ @typing.overload
849
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
836
850
  """
837
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
838
-
839
- User code call
840
- --------------
841
- @ollama(
842
- models=[...],
843
- ...
844
- )
845
-
846
- Valid backend options
847
- ---------------------
848
- - 'local': Run as a separate process on the local task machine.
849
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
850
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
851
-
852
- Valid model options
853
- -------------------
854
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
851
+ Specifies a timeout for your step.
855
852
 
853
+ This decorator is useful if this step may hang indefinitely.
856
854
 
857
- Parameters
858
- ----------
859
- models: list[str]
860
- List of Ollama containers running models in sidecars.
861
- backend: str
862
- Determines where and how to run the Ollama process.
863
- force_pull: bool
864
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
865
- cache_update_policy: str
866
- Cache update policy: "auto", "force", or "never".
867
- force_cache_update: bool
868
- Simple override for "force" cache update policy.
869
- debug: bool
870
- Whether to turn on verbose debugging logs.
871
- circuit_breaker_config: dict
872
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
873
- timeout_config: dict
874
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
875
- """
876
- ...
877
-
878
- @typing.overload
879
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
880
- """
881
- Specifies the PyPI packages for the step.
855
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
856
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
857
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
882
858
 
883
- Information in this decorator will augment any
884
- attributes set in the `@pyi_base` flow-level decorator. Hence,
885
- you can use `@pypi_base` to set packages required by all
886
- steps and use `@pypi` to specify step-specific overrides.
859
+ Note that all the values specified in parameters are added together so if you specify
860
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
887
861
 
888
862
 
889
863
  Parameters
890
864
  ----------
891
- packages : Dict[str, str], default: {}
892
- Packages to use for this step. The key is the name of the package
893
- and the value is the version to use.
894
- python : str, optional, default: None
895
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
896
- that the version used will correspond to the version of the Python interpreter used to start the run.
865
+ seconds : int, default 0
866
+ Number of seconds to wait prior to timing out.
867
+ minutes : int, default 0
868
+ Number of minutes to wait prior to timing out.
869
+ hours : int, default 0
870
+ Number of hours to wait prior to timing out.
897
871
  """
898
872
  ...
899
873
 
900
874
  @typing.overload
901
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
875
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
902
876
  ...
903
877
 
904
878
  @typing.overload
905
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
879
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
906
880
  ...
907
881
 
908
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
882
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
909
883
  """
910
- Specifies the PyPI packages for the step.
884
+ Specifies a timeout for your step.
911
885
 
912
- Information in this decorator will augment any
913
- attributes set in the `@pyi_base` flow-level decorator. Hence,
914
- you can use `@pypi_base` to set packages required by all
915
- steps and use `@pypi` to specify step-specific overrides.
886
+ This decorator is useful if this step may hang indefinitely.
887
+
888
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
889
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
890
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
891
+
892
+ Note that all the values specified in parameters are added together so if you specify
893
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
916
894
 
917
895
 
918
896
  Parameters
919
897
  ----------
920
- packages : Dict[str, str], default: {}
921
- Packages to use for this step. The key is the name of the package
922
- and the value is the version to use.
923
- python : str, optional, default: None
924
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
925
- that the version used will correspond to the version of the Python interpreter used to start the run.
898
+ seconds : int, default 0
899
+ Number of seconds to wait prior to timing out.
900
+ minutes : int, default 0
901
+ Number of minutes to wait prior to timing out.
902
+ hours : int, default 0
903
+ Number of hours to wait prior to timing out.
926
904
  """
927
905
  ...
928
906
 
@@ -1074,377 +1052,205 @@ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
1074
1052
  """
1075
1053
  ...
1076
1054
 
1077
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1078
- """
1079
- Specifies that this step should execute on DGX cloud.
1080
-
1081
-
1082
- Parameters
1083
- ----------
1084
- gpu : int
1085
- Number of GPUs to use.
1086
- gpu_type : str
1087
- Type of Nvidia GPU to use.
1088
- """
1089
- ...
1090
-
1091
- @typing.overload
1092
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1055
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1093
1056
  """
1094
- Specifies secrets to be retrieved and injected as environment variables prior to
1095
- the execution of a step.
1057
+ Specifies that this step should execute on Kubernetes.
1096
1058
 
1097
1059
 
1098
1060
  Parameters
1099
1061
  ----------
1100
- sources : List[Union[str, Dict[str, Any]]], default: []
1101
- List of secret specs, defining how the secrets are to be retrieved
1102
- """
1103
- ...
1104
-
1105
- @typing.overload
1106
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1107
- ...
1108
-
1109
- @typing.overload
1110
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1111
- ...
1112
-
1113
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = []):
1114
- """
1115
- Specifies secrets to be retrieved and injected as environment variables prior to
1116
- the execution of a step.
1117
-
1118
-
1119
- Parameters
1120
- ----------
1121
- sources : List[Union[str, Dict[str, Any]]], default: []
1122
- List of secret specs, defining how the secrets are to be retrieved
1123
- """
1124
- ...
1125
-
1126
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1127
- """
1128
- Specifies that this step should execute on DGX cloud.
1129
-
1130
-
1131
- Parameters
1132
- ----------
1133
- gpu : int
1134
- Number of GPUs to use.
1135
- gpu_type : str
1136
- Type of Nvidia GPU to use.
1137
- queue_timeout : int
1138
- Time to keep the job in NVCF's queue.
1139
- """
1140
- ...
1141
-
1142
- @typing.overload
1143
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1144
- """
1145
- Specifies environment variables to be set prior to the execution of a step.
1146
-
1147
-
1148
- Parameters
1149
- ----------
1150
- vars : Dict[str, str], default {}
1151
- Dictionary of environment variables to set.
1152
- """
1153
- ...
1154
-
1155
- @typing.overload
1156
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1157
- ...
1158
-
1159
- @typing.overload
1160
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1161
- ...
1162
-
1163
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1164
- """
1165
- Specifies environment variables to be set prior to the execution of a step.
1166
-
1167
-
1168
- Parameters
1169
- ----------
1170
- vars : Dict[str, str], default {}
1171
- Dictionary of environment variables to set.
1172
- """
1173
- ...
1174
-
1175
- @typing.overload
1176
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1177
- """
1178
- Decorator prototype for all step decorators. This function gets specialized
1179
- and imported for all decorators types by _import_plugin_decorators().
1180
- """
1181
- ...
1182
-
1183
- @typing.overload
1184
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1185
- ...
1186
-
1187
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1188
- """
1189
- Decorator prototype for all step decorators. This function gets specialized
1190
- and imported for all decorators types by _import_plugin_decorators().
1191
- """
1192
- ...
1193
-
1194
- @typing.overload
1195
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1196
- """
1197
- Specifies a timeout for your step.
1198
-
1199
- This decorator is useful if this step may hang indefinitely.
1200
-
1201
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1202
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1203
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1204
-
1205
- Note that all the values specified in parameters are added together so if you specify
1206
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1207
-
1208
-
1209
- Parameters
1210
- ----------
1211
- seconds : int, default 0
1212
- Number of seconds to wait prior to timing out.
1213
- minutes : int, default 0
1214
- Number of minutes to wait prior to timing out.
1215
- hours : int, default 0
1216
- Number of hours to wait prior to timing out.
1217
- """
1218
- ...
1219
-
1220
- @typing.overload
1221
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1222
- ...
1223
-
1224
- @typing.overload
1225
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1226
- ...
1227
-
1228
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1229
- """
1230
- Specifies a timeout for your step.
1231
-
1232
- This decorator is useful if this step may hang indefinitely.
1233
-
1234
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1235
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1236
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1237
-
1238
- Note that all the values specified in parameters are added together so if you specify
1239
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1240
-
1062
+ cpu : int, default 1
1063
+ Number of CPUs required for this step. If `@resources` is
1064
+ also present, the maximum value from all decorators is used.
1065
+ memory : int, default 4096
1066
+ Memory size (in MB) required for this step. If
1067
+ `@resources` is also present, the maximum value from all decorators is
1068
+ used.
1069
+ disk : int, default 10240
1070
+ Disk size (in MB) required for this step. If
1071
+ `@resources` is also present, the maximum value from all decorators is
1072
+ used.
1073
+ image : str, optional, default None
1074
+ Docker image to use when launching on Kubernetes. If not specified, and
1075
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1076
+ not, a default Docker image mapping to the current version of Python is used.
1077
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1078
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
1079
+ image_pull_secrets: List[str], default []
1080
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
1081
+ Kubernetes image pull secrets to use when pulling container images
1082
+ in Kubernetes.
1083
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1084
+ Kubernetes service account to use when launching pod in Kubernetes.
1085
+ secrets : List[str], optional, default None
1086
+ Kubernetes secrets to use when launching pod in Kubernetes. These
1087
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1088
+ in Metaflow configuration.
1089
+ node_selector: Union[Dict[str,str], str], optional, default None
1090
+ Kubernetes node selector(s) to apply to the pod running the task.
1091
+ Can be passed in as a comma separated string of values e.g.
1092
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
1093
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
1094
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1095
+ Kubernetes namespace to use when launching pod in Kubernetes.
1096
+ gpu : int, optional, default None
1097
+ Number of GPUs required for this step. A value of zero implies that
1098
+ the scheduled node should not have GPUs.
1099
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1100
+ The vendor of the GPUs to be used for this step.
1101
+ tolerations : List[str], default []
1102
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1103
+ Kubernetes tolerations to use when launching pod in Kubernetes.
1104
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
1105
+ Kubernetes labels to use when launching pod in Kubernetes.
1106
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
1107
+ Kubernetes annotations to use when launching pod in Kubernetes.
1108
+ use_tmpfs : bool, default False
1109
+ This enables an explicit tmpfs mount for this step.
1110
+ tmpfs_tempdir : bool, default True
1111
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1112
+ tmpfs_size : int, optional, default: None
1113
+ The value for the size (in MiB) of the tmpfs mount for this step.
1114
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1115
+ memory allocated for this step.
1116
+ tmpfs_path : str, optional, default /metaflow_temp
1117
+ Path to tmpfs mount for this step.
1118
+ persistent_volume_claims : Dict[str, str], optional, default None
1119
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1120
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1121
+ shared_memory: int, optional
1122
+ Shared memory size (in MiB) required for this step
1123
+ port: int, optional
1124
+ Port number to specify in the Kubernetes job object
1125
+ compute_pool : str, optional, default None
1126
+ Compute pool to be used for for this step.
1127
+ If not specified, any accessible compute pool within the perimeter is used.
1128
+ hostname_resolution_timeout: int, default 10 * 60
1129
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
1130
+ Only applicable when @parallel is used.
1131
+ qos: str, default: Burstable
1132
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
1241
1133
 
1242
- Parameters
1243
- ----------
1244
- seconds : int, default 0
1245
- Number of seconds to wait prior to timing out.
1246
- minutes : int, default 0
1247
- Number of minutes to wait prior to timing out.
1248
- hours : int, default 0
1249
- Number of hours to wait prior to timing out.
1134
+ security_context: Dict[str, Any], optional, default None
1135
+ Container security context. Applies to the task container. Allows the following keys:
1136
+ - privileged: bool, optional, default None
1137
+ - allow_privilege_escalation: bool, optional, default None
1138
+ - run_as_user: int, optional, default None
1139
+ - run_as_group: int, optional, default None
1140
+ - run_as_non_root: bool, optional, default None
1250
1141
  """
1251
1142
  ...
1252
1143
 
1253
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1144
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1254
1145
  """
1255
- Allows setting external datastores to save data for the
1256
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1257
-
1258
- This decorator is useful when users wish to save data to a different datastore
1259
- than what is configured in Metaflow. This can be for variety of reasons:
1260
-
1261
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1262
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1263
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1264
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1265
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1266
-
1267
- Usage:
1268
- ----------
1269
-
1270
- - Using a custom IAM role to access the datastore.
1271
-
1272
- ```python
1273
- @with_artifact_store(
1274
- type="s3",
1275
- config=lambda: {
1276
- "root": "s3://my-bucket-foo/path/to/root",
1277
- "role_arn": ROLE,
1278
- },
1279
- )
1280
- class MyFlow(FlowSpec):
1281
-
1282
- @checkpoint
1283
- @step
1284
- def start(self):
1285
- with open("my_file.txt", "w") as f:
1286
- f.write("Hello, World!")
1287
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1288
- self.next(self.end)
1289
-
1290
- ```
1291
-
1292
- - Using credentials to access the s3-compatible datastore.
1146
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
1293
1147
 
1294
- ```python
1295
- @with_artifact_store(
1296
- type="s3",
1297
- config=lambda: {
1298
- "root": "s3://my-bucket-foo/path/to/root",
1299
- "client_params": {
1300
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1301
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1302
- },
1303
- },
1304
- )
1305
- class MyFlow(FlowSpec):
1148
+ > Examples
1306
1149
 
1307
- @checkpoint
1308
- @step
1309
- def start(self):
1310
- with open("my_file.txt", "w") as f:
1311
- f.write("Hello, World!")
1312
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1313
- self.next(self.end)
1150
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
1151
+ ```python
1152
+ @huggingface_hub
1153
+ @step
1154
+ def pull_model_from_huggingface(self):
1155
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
1156
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
1157
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
1158
+ # value of the function is a reference to the model in the backend storage.
1159
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
1314
1160
 
1315
- ```
1161
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
1162
+ self.llama_model = current.huggingface_hub.snapshot_download(
1163
+ repo_id=self.model_id,
1164
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
1165
+ )
1166
+ self.next(self.train)
1167
+ ```
1316
1168
 
1317
- - Accessing objects stored in external datastores after task execution.
1169
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
1170
+ ```python
1171
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
1172
+ @step
1173
+ def pull_model_from_huggingface(self):
1174
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1175
+ ```
1318
1176
 
1319
- ```python
1320
- run = Run("CheckpointsTestsFlow/8992")
1321
- with artifact_store_from(run=run, config={
1322
- "client_params": {
1323
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1324
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1325
- },
1326
- }):
1327
- with Checkpoint() as cp:
1328
- latest = cp.list(
1329
- task=run["start"].task
1330
- )[0]
1331
- print(latest)
1332
- cp.load(
1333
- latest,
1334
- "test-checkpoints"
1335
- )
1177
+ ```python
1178
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
1179
+ @step
1180
+ def finetune_model(self):
1181
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1182
+ # path_to_model will be /my-directory
1183
+ ```
1336
1184
 
1337
- task = Task("TorchTuneFlow/8484/train/53673")
1338
- with artifact_store_from(run=run, config={
1339
- "client_params": {
1340
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1341
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1185
+ ```python
1186
+ # Takes all the arguments passed to `snapshot_download`
1187
+ # except for `local_dir`
1188
+ @huggingface_hub(load=[
1189
+ {
1190
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
1342
1191
  },
1343
- }):
1344
- load_model(
1345
- task.data.model_ref,
1346
- "test-models"
1347
- )
1348
- ```
1349
- Parameters:
1192
+ {
1193
+ "repo_id": "myorg/mistral-lora",
1194
+ "repo_type": "model",
1195
+ },
1196
+ ])
1197
+ @step
1198
+ def finetune_model(self):
1199
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1200
+ # path_to_model will be /my-directory
1201
+ ```
1202
+
1203
+
1204
+ Parameters
1350
1205
  ----------
1206
+ temp_dir_root : str, optional
1207
+ The root directory that will hold the temporary directory where objects will be downloaded.
1351
1208
 
1352
- type: str
1353
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1209
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
1210
+ The list of repos (models/datasets) to load.
1354
1211
 
1355
- config: dict or Callable
1356
- Dictionary of configuration options for the datastore. The following keys are required:
1357
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1358
- - example: 's3://bucket-name/path/to/root'
1359
- - example: 'gs://bucket-name/path/to/root'
1360
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1361
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1362
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1363
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1212
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
1213
+
1214
+ - If repo (model/dataset) is not found in the datastore:
1215
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
1216
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
1217
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
1218
+
1219
+ - If repo is found in the datastore:
1220
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
1364
1221
  """
1365
1222
  ...
1366
1223
 
1367
1224
  @typing.overload
1368
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1225
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1369
1226
  """
1370
- Specifies the PyPI packages for all steps of the flow.
1227
+ Specifies environment variables to be set prior to the execution of a step.
1371
1228
 
1372
- Use `@pypi_base` to set common packages required by all
1373
- steps and use `@pypi` to specify step-specific overrides.
1374
1229
 
1375
1230
  Parameters
1376
1231
  ----------
1377
- packages : Dict[str, str], default: {}
1378
- Packages to use for this flow. The key is the name of the package
1379
- and the value is the version to use.
1380
- python : str, optional, default: None
1381
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1382
- that the version used will correspond to the version of the Python interpreter used to start the run.
1232
+ vars : Dict[str, str], default {}
1233
+ Dictionary of environment variables to set.
1383
1234
  """
1384
1235
  ...
1385
1236
 
1386
1237
  @typing.overload
1387
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1238
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1388
1239
  ...
1389
1240
 
1390
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1391
- """
1392
- Specifies the PyPI packages for all steps of the flow.
1393
-
1394
- Use `@pypi_base` to set common packages required by all
1395
- steps and use `@pypi` to specify step-specific overrides.
1396
-
1397
- Parameters
1398
- ----------
1399
- packages : Dict[str, str], default: {}
1400
- Packages to use for this flow. The key is the name of the package
1401
- and the value is the version to use.
1402
- python : str, optional, default: None
1403
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1404
- that the version used will correspond to the version of the Python interpreter used to start the run.
1405
- """
1241
+ @typing.overload
1242
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1406
1243
  ...
1407
1244
 
1408
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1245
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1409
1246
  """
1410
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1411
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1412
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1413
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1414
- starts only after all sensors finish.
1247
+ Specifies environment variables to be set prior to the execution of a step.
1415
1248
 
1416
1249
 
1417
1250
  Parameters
1418
1251
  ----------
1419
- timeout : int
1420
- Time, in seconds before the task times out and fails. (Default: 3600)
1421
- poke_interval : int
1422
- Time in seconds that the job should wait in between each try. (Default: 60)
1423
- mode : str
1424
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1425
- exponential_backoff : bool
1426
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1427
- pool : str
1428
- the slot pool this task should run in,
1429
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1430
- soft_fail : bool
1431
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1432
- name : str
1433
- Name of the sensor on Airflow
1434
- description : str
1435
- Description of sensor in the Airflow UI
1436
- bucket_key : Union[str, List[str]]
1437
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1438
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1439
- bucket_name : str
1440
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1441
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1442
- wildcard_match : bool
1443
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1444
- aws_conn_id : str
1445
- a reference to the s3 connection on Airflow. (Default: None)
1446
- verify : bool
1447
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1252
+ vars : Dict[str, str], default {}
1253
+ Dictionary of environment variables to set.
1448
1254
  """
1449
1255
  ...
1450
1256
 
@@ -1499,6 +1305,41 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
1499
1305
  """
1500
1306
  ...
1501
1307
 
1308
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1309
+ """
1310
+ Specifies what flows belong to the same project.
1311
+
1312
+ A project-specific namespace is created for all flows that
1313
+ use the same `@project(name)`.
1314
+
1315
+
1316
+ Parameters
1317
+ ----------
1318
+ name : str
1319
+ Project name. Make sure that the name is unique amongst all
1320
+ projects that use the same production scheduler. The name may
1321
+ contain only lowercase alphanumeric characters and underscores.
1322
+
1323
+ branch : Optional[str], default None
1324
+ The branch to use. If not specified, the branch is set to
1325
+ `user.<username>` unless `production` is set to `True`. This can
1326
+ also be set on the command line using `--branch` as a top-level option.
1327
+ It is an error to specify `branch` in the decorator and on the command line.
1328
+
1329
+ production : bool, default False
1330
+ Whether or not the branch is the production branch. This can also be set on the
1331
+ command line using `--production` as a top-level option. It is an error to specify
1332
+ `production` in the decorator and on the command line.
1333
+ The project branch name will be:
1334
+ - if `branch` is specified:
1335
+ - if `production` is True: `prod.<branch>`
1336
+ - if `production` is False: `test.<branch>`
1337
+ - if `branch` is not specified:
1338
+ - if `production` is True: `prod`
1339
+ - if `production` is False: `user.<username>`
1340
+ """
1341
+ ...
1342
+
1502
1343
  @typing.overload
1503
1344
  def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1504
1345
  """
@@ -1625,99 +1466,13 @@ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str,
1625
1466
  allowed_states : List[str]
1626
1467
  Iterable of allowed states, (Default: ['success'])
1627
1468
  failed_states : List[str]
1628
- Iterable of failed or dis-allowed states. (Default: None)
1629
- execution_delta : datetime.timedelta
1630
- time difference with the previous execution to look at,
1631
- the default is the same logical date as the current task or DAG. (Default: None)
1632
- check_existence: bool
1633
- Set to True to check if the external task exists or check if
1634
- the DAG to wait for exists. (Default: True)
1635
- """
1636
- ...
1637
-
1638
- @typing.overload
1639
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1640
- """
1641
- Specifies the Conda environment for all steps of the flow.
1642
-
1643
- Use `@conda_base` to set common libraries required by all
1644
- steps and use `@conda` to specify step-specific additions.
1645
-
1646
-
1647
- Parameters
1648
- ----------
1649
- packages : Dict[str, str], default {}
1650
- Packages to use for this flow. The key is the name of the package
1651
- and the value is the version to use.
1652
- libraries : Dict[str, str], default {}
1653
- Supported for backward compatibility. When used with packages, packages will take precedence.
1654
- python : str, optional, default None
1655
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1656
- that the version used will correspond to the version of the Python interpreter used to start the run.
1657
- disabled : bool, default False
1658
- If set to True, disables Conda.
1659
- """
1660
- ...
1661
-
1662
- @typing.overload
1663
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1664
- ...
1665
-
1666
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1667
- """
1668
- Specifies the Conda environment for all steps of the flow.
1669
-
1670
- Use `@conda_base` to set common libraries required by all
1671
- steps and use `@conda` to specify step-specific additions.
1672
-
1673
-
1674
- Parameters
1675
- ----------
1676
- packages : Dict[str, str], default {}
1677
- Packages to use for this flow. The key is the name of the package
1678
- and the value is the version to use.
1679
- libraries : Dict[str, str], default {}
1680
- Supported for backward compatibility. When used with packages, packages will take precedence.
1681
- python : str, optional, default None
1682
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1683
- that the version used will correspond to the version of the Python interpreter used to start the run.
1684
- disabled : bool, default False
1685
- If set to True, disables Conda.
1686
- """
1687
- ...
1688
-
1689
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1690
- """
1691
- Specifies what flows belong to the same project.
1692
-
1693
- A project-specific namespace is created for all flows that
1694
- use the same `@project(name)`.
1695
-
1696
-
1697
- Parameters
1698
- ----------
1699
- name : str
1700
- Project name. Make sure that the name is unique amongst all
1701
- projects that use the same production scheduler. The name may
1702
- contain only lowercase alphanumeric characters and underscores.
1703
-
1704
- branch : Optional[str], default None
1705
- The branch to use. If not specified, the branch is set to
1706
- `user.<username>` unless `production` is set to `True`. This can
1707
- also be set on the command line using `--branch` as a top-level option.
1708
- It is an error to specify `branch` in the decorator and on the command line.
1709
-
1710
- production : bool, default False
1711
- Whether or not the branch is the production branch. This can also be set on the
1712
- command line using `--production` as a top-level option. It is an error to specify
1713
- `production` in the decorator and on the command line.
1714
- The project branch name will be:
1715
- - if `branch` is specified:
1716
- - if `production` is True: `prod.<branch>`
1717
- - if `production` is False: `test.<branch>`
1718
- - if `branch` is not specified:
1719
- - if `production` is True: `prod`
1720
- - if `production` is False: `user.<username>`
1469
+ Iterable of failed or dis-allowed states. (Default: None)
1470
+ execution_delta : datetime.timedelta
1471
+ time difference with the previous execution to look at,
1472
+ the default is the same logical date as the current task or DAG. (Default: None)
1473
+ check_existence: bool
1474
+ Set to True to check if the external task exists or check if
1475
+ the DAG to wait for exists. (Default: True)
1721
1476
  """
1722
1477
  ...
1723
1478
 
@@ -1822,5 +1577,254 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1822
1577
  """
1823
1578
  ...
1824
1579
 
1580
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1581
+ """
1582
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1583
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1584
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1585
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1586
+ starts only after all sensors finish.
1587
+
1588
+
1589
+ Parameters
1590
+ ----------
1591
+ timeout : int
1592
+ Time, in seconds before the task times out and fails. (Default: 3600)
1593
+ poke_interval : int
1594
+ Time in seconds that the job should wait in between each try. (Default: 60)
1595
+ mode : str
1596
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1597
+ exponential_backoff : bool
1598
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1599
+ pool : str
1600
+ the slot pool this task should run in,
1601
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1602
+ soft_fail : bool
1603
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1604
+ name : str
1605
+ Name of the sensor on Airflow
1606
+ description : str
1607
+ Description of sensor in the Airflow UI
1608
+ bucket_key : Union[str, List[str]]
1609
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1610
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1611
+ bucket_name : str
1612
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1613
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1614
+ wildcard_match : bool
1615
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1616
+ aws_conn_id : str
1617
+ a reference to the s3 connection on Airflow. (Default: None)
1618
+ verify : bool
1619
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1620
+ """
1621
+ ...
1622
+
1623
+ @typing.overload
1624
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1625
+ """
1626
+ Specifies the Conda environment for all steps of the flow.
1627
+
1628
+ Use `@conda_base` to set common libraries required by all
1629
+ steps and use `@conda` to specify step-specific additions.
1630
+
1631
+
1632
+ Parameters
1633
+ ----------
1634
+ packages : Dict[str, str], default {}
1635
+ Packages to use for this flow. The key is the name of the package
1636
+ and the value is the version to use.
1637
+ libraries : Dict[str, str], default {}
1638
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1639
+ python : str, optional, default None
1640
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1641
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1642
+ disabled : bool, default False
1643
+ If set to True, disables Conda.
1644
+ """
1645
+ ...
1646
+
1647
+ @typing.overload
1648
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1649
+ ...
1650
+
1651
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1652
+ """
1653
+ Specifies the Conda environment for all steps of the flow.
1654
+
1655
+ Use `@conda_base` to set common libraries required by all
1656
+ steps and use `@conda` to specify step-specific additions.
1657
+
1658
+
1659
+ Parameters
1660
+ ----------
1661
+ packages : Dict[str, str], default {}
1662
+ Packages to use for this flow. The key is the name of the package
1663
+ and the value is the version to use.
1664
+ libraries : Dict[str, str], default {}
1665
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1666
+ python : str, optional, default None
1667
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1668
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1669
+ disabled : bool, default False
1670
+ If set to True, disables Conda.
1671
+ """
1672
+ ...
1673
+
1674
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1675
+ """
1676
+ Allows setting external datastores to save data for the
1677
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1678
+
1679
+ This decorator is useful when users wish to save data to a different datastore
1680
+ than what is configured in Metaflow. This can be for variety of reasons:
1681
+
1682
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1683
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1684
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1685
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1686
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1687
+
1688
+ Usage:
1689
+ ----------
1690
+
1691
+ - Using a custom IAM role to access the datastore.
1692
+
1693
+ ```python
1694
+ @with_artifact_store(
1695
+ type="s3",
1696
+ config=lambda: {
1697
+ "root": "s3://my-bucket-foo/path/to/root",
1698
+ "role_arn": ROLE,
1699
+ },
1700
+ )
1701
+ class MyFlow(FlowSpec):
1702
+
1703
+ @checkpoint
1704
+ @step
1705
+ def start(self):
1706
+ with open("my_file.txt", "w") as f:
1707
+ f.write("Hello, World!")
1708
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1709
+ self.next(self.end)
1710
+
1711
+ ```
1712
+
1713
+ - Using credentials to access the s3-compatible datastore.
1714
+
1715
+ ```python
1716
+ @with_artifact_store(
1717
+ type="s3",
1718
+ config=lambda: {
1719
+ "root": "s3://my-bucket-foo/path/to/root",
1720
+ "client_params": {
1721
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1722
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1723
+ },
1724
+ },
1725
+ )
1726
+ class MyFlow(FlowSpec):
1727
+
1728
+ @checkpoint
1729
+ @step
1730
+ def start(self):
1731
+ with open("my_file.txt", "w") as f:
1732
+ f.write("Hello, World!")
1733
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1734
+ self.next(self.end)
1735
+
1736
+ ```
1737
+
1738
+ - Accessing objects stored in external datastores after task execution.
1739
+
1740
+ ```python
1741
+ run = Run("CheckpointsTestsFlow/8992")
1742
+ with artifact_store_from(run=run, config={
1743
+ "client_params": {
1744
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1745
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1746
+ },
1747
+ }):
1748
+ with Checkpoint() as cp:
1749
+ latest = cp.list(
1750
+ task=run["start"].task
1751
+ )[0]
1752
+ print(latest)
1753
+ cp.load(
1754
+ latest,
1755
+ "test-checkpoints"
1756
+ )
1757
+
1758
+ task = Task("TorchTuneFlow/8484/train/53673")
1759
+ with artifact_store_from(run=run, config={
1760
+ "client_params": {
1761
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1762
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1763
+ },
1764
+ }):
1765
+ load_model(
1766
+ task.data.model_ref,
1767
+ "test-models"
1768
+ )
1769
+ ```
1770
+ Parameters:
1771
+ ----------
1772
+
1773
+ type: str
1774
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1775
+
1776
+ config: dict or Callable
1777
+ Dictionary of configuration options for the datastore. The following keys are required:
1778
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1779
+ - example: 's3://bucket-name/path/to/root'
1780
+ - example: 'gs://bucket-name/path/to/root'
1781
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1782
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1783
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1784
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1785
+ """
1786
+ ...
1787
+
1788
+ @typing.overload
1789
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1790
+ """
1791
+ Specifies the PyPI packages for all steps of the flow.
1792
+
1793
+ Use `@pypi_base` to set common packages required by all
1794
+ steps and use `@pypi` to specify step-specific overrides.
1795
+
1796
+ Parameters
1797
+ ----------
1798
+ packages : Dict[str, str], default: {}
1799
+ Packages to use for this flow. The key is the name of the package
1800
+ and the value is the version to use.
1801
+ python : str, optional, default: None
1802
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1803
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1804
+ """
1805
+ ...
1806
+
1807
+ @typing.overload
1808
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1809
+ ...
1810
+
1811
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1812
+ """
1813
+ Specifies the PyPI packages for all steps of the flow.
1814
+
1815
+ Use `@pypi_base` to set common packages required by all
1816
+ steps and use `@pypi` to specify step-specific overrides.
1817
+
1818
+ Parameters
1819
+ ----------
1820
+ packages : Dict[str, str], default: {}
1821
+ Packages to use for this flow. The key is the name of the package
1822
+ and the value is the version to use.
1823
+ python : str, optional, default: None
1824
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1825
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1826
+ """
1827
+ ...
1828
+
1825
1829
  pkg_name: str
1826
1830