ob-metaflow-stubs 6.0.7.0__py2.py3-none-any.whl → 6.0.7.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-stubs might be problematic. Click here for more details.

Files changed (262) hide show
  1. metaflow-stubs/__init__.pyi +1090 -1090
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +6 -6
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +5 -5
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/meta_files.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +46 -46
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +2 -2
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +3 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +2 -2
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +3 -3
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +1 -1
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +1 -1
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +1 -1
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +4 -2
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +1 -1
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +1 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +3 -3
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +2 -2
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +2 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +2 -2
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +5 -5
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +2 -2
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +1 -1
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +2 -2
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +4 -4
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +2 -2
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +3 -3
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +1 -1
  115. metaflow-stubs/mf_extensions/outerbounds/toplevel/s3_proxy.pyi +1 -1
  116. metaflow-stubs/multicore_utils.pyi +1 -1
  117. metaflow-stubs/ob_internal.pyi +1 -1
  118. metaflow-stubs/packaging_sys/__init__.pyi +6 -6
  119. metaflow-stubs/packaging_sys/backend.pyi +2 -2
  120. metaflow-stubs/packaging_sys/distribution_support.pyi +3 -3
  121. metaflow-stubs/packaging_sys/tar_backend.pyi +4 -4
  122. metaflow-stubs/packaging_sys/utils.pyi +1 -1
  123. metaflow-stubs/packaging_sys/v1.pyi +2 -2
  124. metaflow-stubs/parameters.pyi +3 -3
  125. metaflow-stubs/plugins/__init__.pyi +11 -10
  126. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  127. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  128. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  129. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  130. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  131. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  132. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  133. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  134. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  135. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  136. metaflow-stubs/plugins/argo/argo_workflows.pyi +3 -3
  137. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +2 -2
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  139. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +3 -3
  140. metaflow-stubs/plugins/argo/exit_hooks.pyi +2 -2
  141. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  142. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  143. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  144. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  145. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  146. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  147. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  148. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  149. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +4 -4
  150. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  151. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  152. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  153. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  156. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +2 -2
  157. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  158. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  159. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  160. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +4 -4
  161. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  162. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  163. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  164. metaflow-stubs/plugins/cards/__init__.pyi +1 -1
  165. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  166. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  167. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  168. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  169. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  170. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  171. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  172. metaflow-stubs/plugins/cards/card_modules/components.pyi +2 -2
  173. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  174. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  175. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  176. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  177. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  178. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  179. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  180. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  181. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  182. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  183. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  184. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  185. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  186. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  187. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  188. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  189. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  190. metaflow-stubs/plugins/exit_hook/__init__.pyi +1 -1
  191. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +1 -1
  192. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  193. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  194. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  195. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +4 -4
  196. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  197. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  198. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  199. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  200. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  201. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  202. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  203. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  204. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  205. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  206. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  207. metaflow-stubs/plugins/ollama/__init__.pyi +1 -1
  208. metaflow-stubs/plugins/optuna/__init__.pyi +24 -0
  209. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  210. metaflow-stubs/plugins/perimeters.pyi +1 -1
  211. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  212. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  213. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  214. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -4
  215. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  216. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  217. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  218. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  219. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  220. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  221. metaflow-stubs/plugins/secrets/__init__.pyi +1 -1
  222. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +3 -3
  223. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  224. metaflow-stubs/plugins/secrets/secrets_func.pyi +1 -1
  225. metaflow-stubs/plugins/secrets/secrets_spec.pyi +1 -1
  226. metaflow-stubs/plugins/secrets/utils.pyi +1 -1
  227. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  228. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  229. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +1 -1
  230. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  231. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  232. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  233. metaflow-stubs/plugins/uv/uv_environment.pyi +2 -2
  234. metaflow-stubs/profilers/__init__.pyi +1 -1
  235. metaflow-stubs/pylint_wrapper.pyi +1 -1
  236. metaflow-stubs/runner/__init__.pyi +1 -1
  237. metaflow-stubs/runner/deployer.pyi +34 -34
  238. metaflow-stubs/runner/deployer_impl.pyi +1 -1
  239. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  240. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  241. metaflow-stubs/runner/nbrun.pyi +1 -1
  242. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  243. metaflow-stubs/runner/utils.pyi +2 -2
  244. metaflow-stubs/system/__init__.pyi +1 -1
  245. metaflow-stubs/system/system_logger.pyi +1 -1
  246. metaflow-stubs/system/system_monitor.pyi +1 -1
  247. metaflow-stubs/tagging_util.pyi +1 -1
  248. metaflow-stubs/tuple_util.pyi +1 -1
  249. metaflow-stubs/user_configs/__init__.pyi +1 -1
  250. metaflow-stubs/user_configs/config_options.pyi +3 -3
  251. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  252. metaflow-stubs/user_decorators/__init__.pyi +1 -1
  253. metaflow-stubs/user_decorators/common.pyi +1 -1
  254. metaflow-stubs/user_decorators/mutable_flow.pyi +5 -5
  255. metaflow-stubs/user_decorators/mutable_step.pyi +4 -4
  256. metaflow-stubs/user_decorators/user_flow_decorator.pyi +4 -4
  257. metaflow-stubs/user_decorators/user_step_decorator.pyi +5 -5
  258. {ob_metaflow_stubs-6.0.7.0.dist-info → ob_metaflow_stubs-6.0.7.2.dist-info}/METADATA +1 -1
  259. ob_metaflow_stubs-6.0.7.2.dist-info/RECORD +262 -0
  260. ob_metaflow_stubs-6.0.7.0.dist-info/RECORD +0 -261
  261. {ob_metaflow_stubs-6.0.7.0.dist-info → ob_metaflow_stubs-6.0.7.2.dist-info}/WHEEL +0 -0
  262. {ob_metaflow_stubs-6.0.7.0.dist-info → ob_metaflow_stubs-6.0.7.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.17.1.0+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-08-18T05:26:58.140419 #
4
+ # Generated on 2025-08-19T23:54:56.174978 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -39,18 +39,18 @@ from .user_decorators.user_step_decorator import UserStepDecorator as UserStepDe
39
39
  from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
40
  from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
41
  from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
42
- from . import tuple_util as tuple_util
43
42
  from . import cards as cards
44
43
  from . import metaflow_git as metaflow_git
44
+ from . import tuple_util as tuple_util
45
45
  from . import events as events
46
46
  from . import runner as runner
47
47
  from . import plugins as plugins
48
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
49
49
  from . import includefile as includefile
50
50
  from .includefile import IncludeFile as IncludeFile
51
+ from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
51
52
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
52
53
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
53
- from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
54
54
  from . import client as client
55
55
  from .client.core import namespace as namespace
56
56
  from .client.core import get_namespace as get_namespace
@@ -167,331 +167,589 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
167
167
  """
168
168
  ...
169
169
 
170
- @typing.overload
171
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
170
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
172
171
  """
173
- Enables loading / saving of models within a step.
172
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
174
173
 
175
- > Examples
176
- - Saving Models
177
- ```python
178
- @model
179
- @step
180
- def train(self):
181
- # current.model.save returns a dictionary reference to the model saved
182
- self.my_model = current.model.save(
183
- path_to_my_model,
184
- label="my_model",
185
- metadata={
186
- "epochs": 10,
187
- "batch-size": 32,
188
- "learning-rate": 0.001,
189
- }
190
- )
191
- self.next(self.test)
174
+ User code call
175
+ --------------
176
+ @ollama(
177
+ models=[...],
178
+ ...
179
+ )
192
180
 
193
- @model(load="my_model")
194
- @step
195
- def test(self):
196
- # `current.model.loaded` returns a dictionary of the loaded models
197
- # where the key is the name of the artifact and the value is the path to the model
198
- print(os.listdir(current.model.loaded["my_model"]))
199
- self.next(self.end)
200
- ```
181
+ Valid backend options
182
+ ---------------------
183
+ - 'local': Run as a separate process on the local task machine.
184
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
185
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
201
186
 
202
- - Loading models
203
- ```python
204
- @step
205
- def train(self):
206
- # current.model.load returns the path to the model loaded
207
- checkpoint_path = current.model.load(
208
- self.checkpoint_key,
209
- )
210
- model_path = current.model.load(
211
- self.model,
212
- )
213
- self.next(self.test)
214
- ```
187
+ Valid model options
188
+ -------------------
189
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
215
190
 
216
191
 
217
192
  Parameters
218
193
  ----------
219
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
220
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
221
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
222
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
223
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
224
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
194
+ models: list[str]
195
+ List of Ollama containers running models in sidecars.
196
+ backend: str
197
+ Determines where and how to run the Ollama process.
198
+ force_pull: bool
199
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
200
+ cache_update_policy: str
201
+ Cache update policy: "auto", "force", or "never".
202
+ force_cache_update: bool
203
+ Simple override for "force" cache update policy.
204
+ debug: bool
205
+ Whether to turn on verbose debugging logs.
206
+ circuit_breaker_config: dict
207
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
208
+ timeout_config: dict
209
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
210
+ """
211
+ ...
212
+
213
+ @typing.overload
214
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
215
+ """
216
+ Specifies the PyPI packages for the step.
225
217
 
226
- temp_dir_root : str, default: None
227
- The root directory under which `current.model.loaded` will store loaded models
218
+ Information in this decorator will augment any
219
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
220
+ you can use `@pypi_base` to set packages required by all
221
+ steps and use `@pypi` to specify step-specific overrides.
222
+
223
+
224
+ Parameters
225
+ ----------
226
+ packages : Dict[str, str], default: {}
227
+ Packages to use for this step. The key is the name of the package
228
+ and the value is the version to use.
229
+ python : str, optional, default: None
230
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
231
+ that the version used will correspond to the version of the Python interpreter used to start the run.
228
232
  """
229
233
  ...
230
234
 
231
235
  @typing.overload
232
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
236
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
233
237
  ...
234
238
 
235
239
  @typing.overload
236
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
240
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
237
241
  ...
238
242
 
239
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
243
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
240
244
  """
241
- Enables loading / saving of models within a step.
245
+ Specifies the PyPI packages for the step.
246
+
247
+ Information in this decorator will augment any
248
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
249
+ you can use `@pypi_base` to set packages required by all
250
+ steps and use `@pypi` to specify step-specific overrides.
251
+
252
+
253
+ Parameters
254
+ ----------
255
+ packages : Dict[str, str], default: {}
256
+ Packages to use for this step. The key is the name of the package
257
+ and the value is the version to use.
258
+ python : str, optional, default: None
259
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
260
+ that the version used will correspond to the version of the Python interpreter used to start the run.
261
+ """
262
+ ...
263
+
264
+ @typing.overload
265
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
266
+ """
267
+ Enables checkpointing for a step.
242
268
 
243
269
  > Examples
244
- - Saving Models
270
+
271
+ - Saving Checkpoints
272
+
245
273
  ```python
246
- @model
274
+ @checkpoint
247
275
  @step
248
276
  def train(self):
249
- # current.model.save returns a dictionary reference to the model saved
250
- self.my_model = current.model.save(
251
- path_to_my_model,
252
- label="my_model",
253
- metadata={
254
- "epochs": 10,
255
- "batch-size": 32,
256
- "learning-rate": 0.001,
257
- }
258
- )
259
- self.next(self.test)
260
-
261
- @model(load="my_model")
262
- @step
263
- def test(self):
264
- # `current.model.loaded` returns a dictionary of the loaded models
265
- # where the key is the name of the artifact and the value is the path to the model
266
- print(os.listdir(current.model.loaded["my_model"]))
267
- self.next(self.end)
277
+ model = create_model(self.parameters, checkpoint_path = None)
278
+ for i in range(self.epochs):
279
+ # some training logic
280
+ loss = model.train(self.dataset)
281
+ if i % 10 == 0:
282
+ model.save(
283
+ current.checkpoint.directory,
284
+ )
285
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
286
+ # and returns a reference dictionary to the checkpoint saved in the datastore
287
+ self.latest_checkpoint = current.checkpoint.save(
288
+ name="epoch_checkpoint",
289
+ metadata={
290
+ "epoch": i,
291
+ "loss": loss,
292
+ }
293
+ )
268
294
  ```
269
295
 
270
- - Loading models
296
+ - Using Loaded Checkpoints
297
+
271
298
  ```python
299
+ @retry(times=3)
300
+ @checkpoint
272
301
  @step
273
302
  def train(self):
274
- # current.model.load returns the path to the model loaded
275
- checkpoint_path = current.model.load(
276
- self.checkpoint_key,
277
- )
278
- model_path = current.model.load(
279
- self.model,
280
- )
281
- self.next(self.test)
303
+ # Assume that the task has restarted and the previous attempt of the task
304
+ # saved a checkpoint
305
+ checkpoint_path = None
306
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
307
+ print("Loaded checkpoint from the previous attempt")
308
+ checkpoint_path = current.checkpoint.directory
309
+
310
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
311
+ for i in range(self.epochs):
312
+ ...
282
313
  ```
283
314
 
284
315
 
285
316
  Parameters
286
317
  ----------
287
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
288
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
289
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
290
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
291
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
292
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
318
+ load_policy : str, default: "fresh"
319
+ The policy for loading the checkpoint. The following policies are supported:
320
+ - "eager": Loads the the latest available checkpoint within the namespace.
321
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
322
+ will be loaded at the start of the task.
323
+ - "none": Do not load any checkpoint
324
+ - "fresh": Loads the lastest checkpoint created within the running Task.
325
+ This mode helps loading checkpoints across various retry attempts of the same task.
326
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
327
+ created within the task will be loaded when the task is retries execution on failure.
293
328
 
294
329
  temp_dir_root : str, default: None
295
- The root directory under which `current.model.loaded` will store loaded models
330
+ The root directory under which `current.checkpoint.directory` will be created.
296
331
  """
297
332
  ...
298
333
 
299
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
300
- """
301
- Specifies that this step should execute on DGX cloud.
302
-
303
-
304
- Parameters
305
- ----------
306
- gpu : int
307
- Number of GPUs to use.
308
- gpu_type : str
309
- Type of Nvidia GPU to use.
310
- queue_timeout : int
311
- Time to keep the job in NVCF's queue.
312
- """
334
+ @typing.overload
335
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
313
336
  ...
314
337
 
315
338
  @typing.overload
316
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
339
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
340
+ ...
341
+
342
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
317
343
  """
318
- Specifies the number of times the task corresponding
319
- to a step needs to be retried.
320
-
321
- This decorator is useful for handling transient errors, such as networking issues.
322
- If your task contains operations that can't be retried safely, e.g. database updates,
323
- it is advisable to annotate it with `@retry(times=0)`.
344
+ Enables checkpointing for a step.
324
345
 
325
- This can be used in conjunction with the `@catch` decorator. The `@catch`
326
- decorator will execute a no-op task after all retries have been exhausted,
327
- ensuring that the flow execution can continue.
346
+ > Examples
328
347
 
348
+ - Saving Checkpoints
329
349
 
330
- Parameters
331
- ----------
332
- times : int, default 3
333
- Number of times to retry this task.
334
- minutes_between_retries : int, default 2
335
- Number of minutes between retries.
336
- """
337
- ...
338
-
339
- @typing.overload
340
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
341
- ...
342
-
343
- @typing.overload
344
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
345
- ...
346
-
347
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
348
- """
349
- Specifies the number of times the task corresponding
350
- to a step needs to be retried.
350
+ ```python
351
+ @checkpoint
352
+ @step
353
+ def train(self):
354
+ model = create_model(self.parameters, checkpoint_path = None)
355
+ for i in range(self.epochs):
356
+ # some training logic
357
+ loss = model.train(self.dataset)
358
+ if i % 10 == 0:
359
+ model.save(
360
+ current.checkpoint.directory,
361
+ )
362
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
363
+ # and returns a reference dictionary to the checkpoint saved in the datastore
364
+ self.latest_checkpoint = current.checkpoint.save(
365
+ name="epoch_checkpoint",
366
+ metadata={
367
+ "epoch": i,
368
+ "loss": loss,
369
+ }
370
+ )
371
+ ```
351
372
 
352
- This decorator is useful for handling transient errors, such as networking issues.
353
- If your task contains operations that can't be retried safely, e.g. database updates,
354
- it is advisable to annotate it with `@retry(times=0)`.
373
+ - Using Loaded Checkpoints
355
374
 
356
- This can be used in conjunction with the `@catch` decorator. The `@catch`
357
- decorator will execute a no-op task after all retries have been exhausted,
358
- ensuring that the flow execution can continue.
375
+ ```python
376
+ @retry(times=3)
377
+ @checkpoint
378
+ @step
379
+ def train(self):
380
+ # Assume that the task has restarted and the previous attempt of the task
381
+ # saved a checkpoint
382
+ checkpoint_path = None
383
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
384
+ print("Loaded checkpoint from the previous attempt")
385
+ checkpoint_path = current.checkpoint.directory
386
+
387
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
388
+ for i in range(self.epochs):
389
+ ...
390
+ ```
359
391
 
360
392
 
361
393
  Parameters
362
394
  ----------
363
- times : int, default 3
364
- Number of times to retry this task.
365
- minutes_between_retries : int, default 2
366
- Number of minutes between retries.
395
+ load_policy : str, default: "fresh"
396
+ The policy for loading the checkpoint. The following policies are supported:
397
+ - "eager": Loads the the latest available checkpoint within the namespace.
398
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
399
+ will be loaded at the start of the task.
400
+ - "none": Do not load any checkpoint
401
+ - "fresh": Loads the lastest checkpoint created within the running Task.
402
+ This mode helps loading checkpoints across various retry attempts of the same task.
403
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
404
+ created within the task will be loaded when the task is retries execution on failure.
405
+
406
+ temp_dir_root : str, default: None
407
+ The root directory under which `current.checkpoint.directory` will be created.
367
408
  """
368
409
  ...
369
410
 
370
411
  @typing.overload
371
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
412
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
372
413
  """
373
- Decorator prototype for all step decorators. This function gets specialized
374
- and imported for all decorators types by _import_plugin_decorators().
414
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
415
+ It exists to make it easier for users to know that this decorator should only be used with
416
+ a Neo Cloud like CoreWeave.
375
417
  """
376
418
  ...
377
419
 
378
420
  @typing.overload
379
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
421
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
380
422
  ...
381
423
 
382
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
424
+ def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
383
425
  """
384
- Decorator prototype for all step decorators. This function gets specialized
385
- and imported for all decorators types by _import_plugin_decorators().
426
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
427
+ It exists to make it easier for users to know that this decorator should only be used with
428
+ a Neo Cloud like CoreWeave.
386
429
  """
387
430
  ...
388
431
 
389
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
432
+ @typing.overload
433
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
390
434
  """
391
- This decorator is used to run vllm APIs as Metaflow task sidecars.
392
-
393
- User code call
394
- --------------
395
- @vllm(
396
- model="...",
397
- ...
398
- )
435
+ Enables loading / saving of models within a step.
399
436
 
400
- Valid backend options
401
- ---------------------
402
- - 'local': Run as a separate process on the local task machine.
437
+ > Examples
438
+ - Saving Models
439
+ ```python
440
+ @model
441
+ @step
442
+ def train(self):
443
+ # current.model.save returns a dictionary reference to the model saved
444
+ self.my_model = current.model.save(
445
+ path_to_my_model,
446
+ label="my_model",
447
+ metadata={
448
+ "epochs": 10,
449
+ "batch-size": 32,
450
+ "learning-rate": 0.001,
451
+ }
452
+ )
453
+ self.next(self.test)
403
454
 
404
- Valid model options
405
- -------------------
406
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
455
+ @model(load="my_model")
456
+ @step
457
+ def test(self):
458
+ # `current.model.loaded` returns a dictionary of the loaded models
459
+ # where the key is the name of the artifact and the value is the path to the model
460
+ print(os.listdir(current.model.loaded["my_model"]))
461
+ self.next(self.end)
462
+ ```
407
463
 
408
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
409
- If you need multiple models, you must create multiple @vllm decorators.
464
+ - Loading models
465
+ ```python
466
+ @step
467
+ def train(self):
468
+ # current.model.load returns the path to the model loaded
469
+ checkpoint_path = current.model.load(
470
+ self.checkpoint_key,
471
+ )
472
+ model_path = current.model.load(
473
+ self.model,
474
+ )
475
+ self.next(self.test)
476
+ ```
410
477
 
411
478
 
412
479
  Parameters
413
480
  ----------
414
- model: str
415
- HuggingFace model identifier to be served by vLLM.
416
- backend: str
417
- Determines where and how to run the vLLM process.
418
- openai_api_server: bool
419
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
420
- Default is False (uses native engine).
421
- Set to True for backward compatibility with existing code.
422
- debug: bool
423
- Whether to turn on verbose debugging logs.
424
- card_refresh_interval: int
425
- Interval in seconds for refreshing the vLLM status card.
426
- Only used when openai_api_server=True.
427
- max_retries: int
428
- Maximum number of retries checking for vLLM server startup.
429
- Only used when openai_api_server=True.
430
- retry_alert_frequency: int
431
- Frequency of alert logs for vLLM server startup retries.
432
- Only used when openai_api_server=True.
433
- engine_args : dict
434
- Additional keyword arguments to pass to the vLLM engine.
435
- For example, `tensor_parallel_size=2`.
436
- """
437
- ...
438
-
439
- @typing.overload
440
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
441
- """
442
- Specifies the Conda environment for the step.
443
-
444
- Information in this decorator will augment any
445
- attributes set in the `@conda_base` flow-level decorator. Hence,
446
- you can use `@conda_base` to set packages required by all
447
- steps and use `@conda` to specify step-specific overrides.
448
-
481
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
482
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
483
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
484
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
485
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
486
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
449
487
 
450
- Parameters
451
- ----------
452
- packages : Dict[str, str], default {}
453
- Packages to use for this step. The key is the name of the package
454
- and the value is the version to use.
455
- libraries : Dict[str, str], default {}
456
- Supported for backward compatibility. When used with packages, packages will take precedence.
457
- python : str, optional, default None
458
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
459
- that the version used will correspond to the version of the Python interpreter used to start the run.
460
- disabled : bool, default False
461
- If set to True, disables @conda.
488
+ temp_dir_root : str, default: None
489
+ The root directory under which `current.model.loaded` will store loaded models
462
490
  """
463
491
  ...
464
492
 
465
493
  @typing.overload
466
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
494
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
467
495
  ...
468
496
 
469
497
  @typing.overload
470
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
498
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
471
499
  ...
472
500
 
473
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
501
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
474
502
  """
475
- Specifies the Conda environment for the step.
503
+ Enables loading / saving of models within a step.
476
504
 
477
- Information in this decorator will augment any
478
- attributes set in the `@conda_base` flow-level decorator. Hence,
479
- you can use `@conda_base` to set packages required by all
480
- steps and use `@conda` to specify step-specific overrides.
505
+ > Examples
506
+ - Saving Models
507
+ ```python
508
+ @model
509
+ @step
510
+ def train(self):
511
+ # current.model.save returns a dictionary reference to the model saved
512
+ self.my_model = current.model.save(
513
+ path_to_my_model,
514
+ label="my_model",
515
+ metadata={
516
+ "epochs": 10,
517
+ "batch-size": 32,
518
+ "learning-rate": 0.001,
519
+ }
520
+ )
521
+ self.next(self.test)
481
522
 
523
+ @model(load="my_model")
524
+ @step
525
+ def test(self):
526
+ # `current.model.loaded` returns a dictionary of the loaded models
527
+ # where the key is the name of the artifact and the value is the path to the model
528
+ print(os.listdir(current.model.loaded["my_model"]))
529
+ self.next(self.end)
530
+ ```
482
531
 
483
- Parameters
484
- ----------
485
- packages : Dict[str, str], default {}
486
- Packages to use for this step. The key is the name of the package
487
- and the value is the version to use.
488
- libraries : Dict[str, str], default {}
489
- Supported for backward compatibility. When used with packages, packages will take precedence.
490
- python : str, optional, default None
491
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
492
- that the version used will correspond to the version of the Python interpreter used to start the run.
493
- disabled : bool, default False
494
- If set to True, disables @conda.
532
+ - Loading models
533
+ ```python
534
+ @step
535
+ def train(self):
536
+ # current.model.load returns the path to the model loaded
537
+ checkpoint_path = current.model.load(
538
+ self.checkpoint_key,
539
+ )
540
+ model_path = current.model.load(
541
+ self.model,
542
+ )
543
+ self.next(self.test)
544
+ ```
545
+
546
+
547
+ Parameters
548
+ ----------
549
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
550
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
551
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
552
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
553
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
554
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
555
+
556
+ temp_dir_root : str, default: None
557
+ The root directory under which `current.model.loaded` will store loaded models
558
+ """
559
+ ...
560
+
561
+ @typing.overload
562
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
563
+ """
564
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
565
+ It exists to make it easier for users to know that this decorator should only be used with
566
+ a Neo Cloud like Nebius.
567
+ """
568
+ ...
569
+
570
+ @typing.overload
571
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
572
+ ...
573
+
574
+ def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
575
+ """
576
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
577
+ It exists to make it easier for users to know that this decorator should only be used with
578
+ a Neo Cloud like Nebius.
579
+ """
580
+ ...
581
+
582
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
583
+ """
584
+ Specifies that this step should execute on DGX cloud.
585
+
586
+
587
+ Parameters
588
+ ----------
589
+ gpu : int
590
+ Number of GPUs to use.
591
+ gpu_type : str
592
+ Type of Nvidia GPU to use.
593
+ queue_timeout : int
594
+ Time to keep the job in NVCF's queue.
595
+ """
596
+ ...
597
+
598
+ @typing.overload
599
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
600
+ """
601
+ A simple decorator that demonstrates using CardDecoratorInjector
602
+ to inject a card and render simple markdown content.
603
+ """
604
+ ...
605
+
606
+ @typing.overload
607
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
608
+ ...
609
+
610
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
611
+ """
612
+ A simple decorator that demonstrates using CardDecoratorInjector
613
+ to inject a card and render simple markdown content.
614
+ """
615
+ ...
616
+
617
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
618
+ """
619
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
620
+
621
+ > Examples
622
+
623
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
624
+ ```python
625
+ @huggingface_hub
626
+ @step
627
+ def pull_model_from_huggingface(self):
628
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
629
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
630
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
631
+ # value of the function is a reference to the model in the backend storage.
632
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
633
+
634
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
635
+ self.llama_model = current.huggingface_hub.snapshot_download(
636
+ repo_id=self.model_id,
637
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
638
+ )
639
+ self.next(self.train)
640
+ ```
641
+
642
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
643
+ ```python
644
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
645
+ @step
646
+ def pull_model_from_huggingface(self):
647
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
648
+ ```
649
+
650
+ ```python
651
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
652
+ @step
653
+ def finetune_model(self):
654
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
655
+ # path_to_model will be /my-directory
656
+ ```
657
+
658
+ ```python
659
+ # Takes all the arguments passed to `snapshot_download`
660
+ # except for `local_dir`
661
+ @huggingface_hub(load=[
662
+ {
663
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
664
+ },
665
+ {
666
+ "repo_id": "myorg/mistral-lora",
667
+ "repo_type": "model",
668
+ },
669
+ ])
670
+ @step
671
+ def finetune_model(self):
672
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
673
+ # path_to_model will be /my-directory
674
+ ```
675
+
676
+
677
+ Parameters
678
+ ----------
679
+ temp_dir_root : str, optional
680
+ The root directory that will hold the temporary directory where objects will be downloaded.
681
+
682
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
683
+ The list of repos (models/datasets) to load.
684
+
685
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
686
+
687
+ - If repo (model/dataset) is not found in the datastore:
688
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
689
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
690
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
691
+
692
+ - If repo is found in the datastore:
693
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
694
+ """
695
+ ...
696
+
697
+ @typing.overload
698
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
699
+ """
700
+ Specifies a timeout for your step.
701
+
702
+ This decorator is useful if this step may hang indefinitely.
703
+
704
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
705
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
706
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
707
+
708
+ Note that all the values specified in parameters are added together so if you specify
709
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
710
+
711
+
712
+ Parameters
713
+ ----------
714
+ seconds : int, default 0
715
+ Number of seconds to wait prior to timing out.
716
+ minutes : int, default 0
717
+ Number of minutes to wait prior to timing out.
718
+ hours : int, default 0
719
+ Number of hours to wait prior to timing out.
720
+ """
721
+ ...
722
+
723
+ @typing.overload
724
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
725
+ ...
726
+
727
+ @typing.overload
728
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
729
+ ...
730
+
731
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
732
+ """
733
+ Specifies a timeout for your step.
734
+
735
+ This decorator is useful if this step may hang indefinitely.
736
+
737
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
738
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
739
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
740
+
741
+ Note that all the values specified in parameters are added together so if you specify
742
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
743
+
744
+
745
+ Parameters
746
+ ----------
747
+ seconds : int, default 0
748
+ Number of seconds to wait prior to timing out.
749
+ minutes : int, default 0
750
+ Number of minutes to wait prior to timing out.
751
+ hours : int, default 0
752
+ Number of hours to wait prior to timing out.
495
753
  """
496
754
  ...
497
755
 
@@ -545,152 +803,111 @@ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
545
803
  ...
546
804
 
547
805
  @typing.overload
548
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
806
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
549
807
  """
550
- Specifies the PyPI packages for the step.
551
-
552
- Information in this decorator will augment any
553
- attributes set in the `@pyi_base` flow-level decorator. Hence,
554
- you can use `@pypi_base` to set packages required by all
555
- steps and use `@pypi` to specify step-specific overrides.
556
-
557
-
558
- Parameters
559
- ----------
560
- packages : Dict[str, str], default: {}
561
- Packages to use for this step. The key is the name of the package
562
- and the value is the version to use.
563
- python : str, optional, default: None
564
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
565
- that the version used will correspond to the version of the Python interpreter used to start the run.
808
+ Decorator prototype for all step decorators. This function gets specialized
809
+ and imported for all decorators types by _import_plugin_decorators().
566
810
  """
567
811
  ...
568
812
 
569
813
  @typing.overload
570
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
814
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
571
815
  ...
572
816
 
573
- @typing.overload
574
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
817
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
818
+ """
819
+ Decorator prototype for all step decorators. This function gets specialized
820
+ and imported for all decorators types by _import_plugin_decorators().
821
+ """
575
822
  ...
576
823
 
577
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
824
+ @typing.overload
825
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
578
826
  """
579
- Specifies the PyPI packages for the step.
580
-
581
- Information in this decorator will augment any
582
- attributes set in the `@pyi_base` flow-level decorator. Hence,
583
- you can use `@pypi_base` to set packages required by all
584
- steps and use `@pypi` to specify step-specific overrides.
827
+ Specifies secrets to be retrieved and injected as environment variables prior to
828
+ the execution of a step.
585
829
 
586
830
 
587
831
  Parameters
588
832
  ----------
589
- packages : Dict[str, str], default: {}
590
- Packages to use for this step. The key is the name of the package
591
- and the value is the version to use.
592
- python : str, optional, default: None
593
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
594
- that the version used will correspond to the version of the Python interpreter used to start the run.
833
+ sources : List[Union[str, Dict[str, Any]]], default: []
834
+ List of secret specs, defining how the secrets are to be retrieved
835
+ role : str, optional, default: None
836
+ Role to use for fetching secrets
595
837
  """
596
838
  ...
597
839
 
598
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
840
+ @typing.overload
841
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
842
+ ...
843
+
844
+ @typing.overload
845
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
846
+ ...
847
+
848
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
599
849
  """
600
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
601
-
602
- User code call
603
- --------------
604
- @ollama(
605
- models=[...],
606
- ...
607
- )
608
-
609
- Valid backend options
610
- ---------------------
611
- - 'local': Run as a separate process on the local task machine.
612
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
613
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
614
-
615
- Valid model options
616
- -------------------
617
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
850
+ Specifies secrets to be retrieved and injected as environment variables prior to
851
+ the execution of a step.
618
852
 
619
853
 
620
854
  Parameters
621
855
  ----------
622
- models: list[str]
623
- List of Ollama containers running models in sidecars.
624
- backend: str
625
- Determines where and how to run the Ollama process.
626
- force_pull: bool
627
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
628
- cache_update_policy: str
629
- Cache update policy: "auto", "force", or "never".
630
- force_cache_update: bool
631
- Simple override for "force" cache update policy.
632
- debug: bool
633
- Whether to turn on verbose debugging logs.
634
- circuit_breaker_config: dict
635
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
636
- timeout_config: dict
637
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
856
+ sources : List[Union[str, Dict[str, Any]]], default: []
857
+ List of secret specs, defining how the secrets are to be retrieved
858
+ role : str, optional, default: None
859
+ Role to use for fetching secrets
638
860
  """
639
861
  ...
640
862
 
641
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
863
+ @typing.overload
864
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
642
865
  """
643
- Specifies that this step should execute on DGX cloud.
866
+ Specifies that the step will success under all circumstances.
867
+
868
+ The decorator will create an optional artifact, specified by `var`, which
869
+ contains the exception raised. You can use it to detect the presence
870
+ of errors, indicating that all happy-path artifacts produced by the step
871
+ are missing.
644
872
 
645
873
 
646
874
  Parameters
647
875
  ----------
648
- gpu : int
649
- Number of GPUs to use.
650
- gpu_type : str
651
- Type of Nvidia GPU to use.
652
- """
653
- ...
654
-
655
- @typing.overload
656
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
657
- """
658
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
659
- It exists to make it easier for users to know that this decorator should only be used with
660
- a Neo Cloud like Nebius.
661
- """
662
- ...
663
-
664
- @typing.overload
665
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
666
- ...
667
-
668
- def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
669
- """
670
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
671
- It exists to make it easier for users to know that this decorator should only be used with
672
- a Neo Cloud like Nebius.
876
+ var : str, optional, default None
877
+ Name of the artifact in which to store the caught exception.
878
+ If not specified, the exception is not stored.
879
+ print_exception : bool, default True
880
+ Determines whether or not the exception is printed to
881
+ stdout when caught.
673
882
  """
674
883
  ...
675
884
 
676
885
  @typing.overload
677
- def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
678
- """
679
- CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
680
- It exists to make it easier for users to know that this decorator should only be used with
681
- a Neo Cloud like CoreWeave.
682
- """
886
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
683
887
  ...
684
888
 
685
889
  @typing.overload
686
- def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
890
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
687
891
  ...
688
892
 
689
- def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
893
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
690
894
  """
691
- CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
692
- It exists to make it easier for users to know that this decorator should only be used with
693
- a Neo Cloud like CoreWeave.
895
+ Specifies that the step will success under all circumstances.
896
+
897
+ The decorator will create an optional artifact, specified by `var`, which
898
+ contains the exception raised. You can use it to detect the presence
899
+ of errors, indicating that all happy-path artifacts produced by the step
900
+ are missing.
901
+
902
+
903
+ Parameters
904
+ ----------
905
+ var : str, optional, default None
906
+ Name of the artifact in which to store the caught exception.
907
+ If not specified, the exception is not stored.
908
+ print_exception : bool, default True
909
+ Determines whether or not the exception is printed to
910
+ stdout when caught.
694
911
  """
695
912
  ...
696
913
 
@@ -728,119 +945,147 @@ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], Non
728
945
  ...
729
946
 
730
947
  @typing.overload
731
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
948
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
732
949
  """
733
- Specifies a timeout for your step.
734
-
735
- This decorator is useful if this step may hang indefinitely.
736
-
737
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
738
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
739
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
950
+ Specifies the Conda environment for the step.
740
951
 
741
- Note that all the values specified in parameters are added together so if you specify
742
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
952
+ Information in this decorator will augment any
953
+ attributes set in the `@conda_base` flow-level decorator. Hence,
954
+ you can use `@conda_base` to set packages required by all
955
+ steps and use `@conda` to specify step-specific overrides.
743
956
 
744
957
 
745
958
  Parameters
746
959
  ----------
747
- seconds : int, default 0
748
- Number of seconds to wait prior to timing out.
749
- minutes : int, default 0
750
- Number of minutes to wait prior to timing out.
751
- hours : int, default 0
752
- Number of hours to wait prior to timing out.
960
+ packages : Dict[str, str], default {}
961
+ Packages to use for this step. The key is the name of the package
962
+ and the value is the version to use.
963
+ libraries : Dict[str, str], default {}
964
+ Supported for backward compatibility. When used with packages, packages will take precedence.
965
+ python : str, optional, default None
966
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
967
+ that the version used will correspond to the version of the Python interpreter used to start the run.
968
+ disabled : bool, default False
969
+ If set to True, disables @conda.
753
970
  """
754
971
  ...
755
972
 
756
973
  @typing.overload
757
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
974
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
758
975
  ...
759
976
 
760
977
  @typing.overload
761
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
978
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
762
979
  ...
763
980
 
764
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
981
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
765
982
  """
766
- Specifies a timeout for your step.
983
+ Specifies the Conda environment for the step.
767
984
 
768
- This decorator is useful if this step may hang indefinitely.
985
+ Information in this decorator will augment any
986
+ attributes set in the `@conda_base` flow-level decorator. Hence,
987
+ you can use `@conda_base` to set packages required by all
988
+ steps and use `@conda` to specify step-specific overrides.
769
989
 
770
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
771
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
772
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
773
990
 
774
- Note that all the values specified in parameters are added together so if you specify
775
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
991
+ Parameters
992
+ ----------
993
+ packages : Dict[str, str], default {}
994
+ Packages to use for this step. The key is the name of the package
995
+ and the value is the version to use.
996
+ libraries : Dict[str, str], default {}
997
+ Supported for backward compatibility. When used with packages, packages will take precedence.
998
+ python : str, optional, default None
999
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1000
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1001
+ disabled : bool, default False
1002
+ If set to True, disables @conda.
1003
+ """
1004
+ ...
1005
+
1006
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1007
+ """
1008
+ Specifies that this step should execute on DGX cloud.
776
1009
 
777
1010
 
778
1011
  Parameters
779
1012
  ----------
780
- seconds : int, default 0
781
- Number of seconds to wait prior to timing out.
782
- minutes : int, default 0
783
- Number of minutes to wait prior to timing out.
784
- hours : int, default 0
785
- Number of hours to wait prior to timing out.
1013
+ gpu : int
1014
+ Number of GPUs to use.
1015
+ gpu_type : str
1016
+ Type of Nvidia GPU to use.
786
1017
  """
787
1018
  ...
788
1019
 
789
1020
  @typing.overload
790
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1021
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
791
1022
  """
792
- Decorator prototype for all step decorators. This function gets specialized
793
- and imported for all decorators types by _import_plugin_decorators().
1023
+ Internal decorator to support Fast bakery
794
1024
  """
795
1025
  ...
796
1026
 
797
1027
  @typing.overload
798
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1028
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
799
1029
  ...
800
1030
 
801
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1031
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
802
1032
  """
803
- Decorator prototype for all step decorators. This function gets specialized
804
- and imported for all decorators types by _import_plugin_decorators().
1033
+ Internal decorator to support Fast bakery
805
1034
  """
806
1035
  ...
807
1036
 
808
1037
  @typing.overload
809
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1038
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
810
1039
  """
811
- Specifies secrets to be retrieved and injected as environment variables prior to
812
- the execution of a step.
1040
+ Specifies the number of times the task corresponding
1041
+ to a step needs to be retried.
1042
+
1043
+ This decorator is useful for handling transient errors, such as networking issues.
1044
+ If your task contains operations that can't be retried safely, e.g. database updates,
1045
+ it is advisable to annotate it with `@retry(times=0)`.
1046
+
1047
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1048
+ decorator will execute a no-op task after all retries have been exhausted,
1049
+ ensuring that the flow execution can continue.
813
1050
 
814
1051
 
815
1052
  Parameters
816
1053
  ----------
817
- sources : List[Union[str, Dict[str, Any]]], default: []
818
- List of secret specs, defining how the secrets are to be retrieved
819
- role : str, optional, default: None
820
- Role to use for fetching secrets
1054
+ times : int, default 3
1055
+ Number of times to retry this task.
1056
+ minutes_between_retries : int, default 2
1057
+ Number of minutes between retries.
821
1058
  """
822
1059
  ...
823
1060
 
824
1061
  @typing.overload
825
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1062
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
826
1063
  ...
827
1064
 
828
1065
  @typing.overload
829
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1066
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
830
1067
  ...
831
1068
 
832
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1069
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
833
1070
  """
834
- Specifies secrets to be retrieved and injected as environment variables prior to
835
- the execution of a step.
1071
+ Specifies the number of times the task corresponding
1072
+ to a step needs to be retried.
1073
+
1074
+ This decorator is useful for handling transient errors, such as networking issues.
1075
+ If your task contains operations that can't be retried safely, e.g. database updates,
1076
+ it is advisable to annotate it with `@retry(times=0)`.
1077
+
1078
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1079
+ decorator will execute a no-op task after all retries have been exhausted,
1080
+ ensuring that the flow execution can continue.
836
1081
 
837
1082
 
838
1083
  Parameters
839
1084
  ----------
840
- sources : List[Union[str, Dict[str, Any]]], default: []
841
- List of secret specs, defining how the secrets are to be retrieved
842
- role : str, optional, default: None
843
- Role to use for fetching secrets
1085
+ times : int, default 3
1086
+ Number of times to retry this task.
1087
+ minutes_between_retries : int, default 2
1088
+ Number of minutes between retries.
844
1089
  """
845
1090
  ...
846
1091
 
@@ -866,119 +1111,142 @@ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typin
866
1111
  """
867
1112
  ...
868
1113
 
869
- @typing.overload
870
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
871
- """
872
- Internal decorator to support Fast bakery
873
- """
874
- ...
875
-
876
- @typing.overload
877
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
878
- ...
879
-
880
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
881
- """
882
- Internal decorator to support Fast bakery
883
- """
884
- ...
885
-
886
- @typing.overload
887
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
888
- """
889
- A simple decorator that demonstrates using CardDecoratorInjector
890
- to inject a card and render simple markdown content.
891
- """
892
- ...
893
-
894
- @typing.overload
895
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
896
- ...
897
-
898
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1114
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
899
1115
  """
900
- A simple decorator that demonstrates using CardDecoratorInjector
901
- to inject a card and render simple markdown content.
1116
+ Specifies that this step should execute on Kubernetes.
1117
+
1118
+
1119
+ Parameters
1120
+ ----------
1121
+ cpu : int, default 1
1122
+ Number of CPUs required for this step. If `@resources` is
1123
+ also present, the maximum value from all decorators is used.
1124
+ memory : int, default 4096
1125
+ Memory size (in MB) required for this step. If
1126
+ `@resources` is also present, the maximum value from all decorators is
1127
+ used.
1128
+ disk : int, default 10240
1129
+ Disk size (in MB) required for this step. If
1130
+ `@resources` is also present, the maximum value from all decorators is
1131
+ used.
1132
+ image : str, optional, default None
1133
+ Docker image to use when launching on Kubernetes. If not specified, and
1134
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1135
+ not, a default Docker image mapping to the current version of Python is used.
1136
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1137
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
1138
+ image_pull_secrets: List[str], default []
1139
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
1140
+ Kubernetes image pull secrets to use when pulling container images
1141
+ in Kubernetes.
1142
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1143
+ Kubernetes service account to use when launching pod in Kubernetes.
1144
+ secrets : List[str], optional, default None
1145
+ Kubernetes secrets to use when launching pod in Kubernetes. These
1146
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1147
+ in Metaflow configuration.
1148
+ node_selector: Union[Dict[str,str], str], optional, default None
1149
+ Kubernetes node selector(s) to apply to the pod running the task.
1150
+ Can be passed in as a comma separated string of values e.g.
1151
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
1152
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
1153
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1154
+ Kubernetes namespace to use when launching pod in Kubernetes.
1155
+ gpu : int, optional, default None
1156
+ Number of GPUs required for this step. A value of zero implies that
1157
+ the scheduled node should not have GPUs.
1158
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1159
+ The vendor of the GPUs to be used for this step.
1160
+ tolerations : List[Dict[str,str]], default []
1161
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1162
+ Kubernetes tolerations to use when launching pod in Kubernetes.
1163
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
1164
+ Kubernetes labels to use when launching pod in Kubernetes.
1165
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
1166
+ Kubernetes annotations to use when launching pod in Kubernetes.
1167
+ use_tmpfs : bool, default False
1168
+ This enables an explicit tmpfs mount for this step.
1169
+ tmpfs_tempdir : bool, default True
1170
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1171
+ tmpfs_size : int, optional, default: None
1172
+ The value for the size (in MiB) of the tmpfs mount for this step.
1173
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1174
+ memory allocated for this step.
1175
+ tmpfs_path : str, optional, default /metaflow_temp
1176
+ Path to tmpfs mount for this step.
1177
+ persistent_volume_claims : Dict[str, str], optional, default None
1178
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1179
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1180
+ shared_memory: int, optional
1181
+ Shared memory size (in MiB) required for this step
1182
+ port: int, optional
1183
+ Port number to specify in the Kubernetes job object
1184
+ compute_pool : str, optional, default None
1185
+ Compute pool to be used for for this step.
1186
+ If not specified, any accessible compute pool within the perimeter is used.
1187
+ hostname_resolution_timeout: int, default 10 * 60
1188
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
1189
+ Only applicable when @parallel is used.
1190
+ qos: str, default: Burstable
1191
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
1192
+
1193
+ security_context: Dict[str, Any], optional, default None
1194
+ Container security context. Applies to the task container. Allows the following keys:
1195
+ - privileged: bool, optional, default None
1196
+ - allow_privilege_escalation: bool, optional, default None
1197
+ - run_as_user: int, optional, default None
1198
+ - run_as_group: int, optional, default None
1199
+ - run_as_non_root: bool, optional, default None
902
1200
  """
903
1201
  ...
904
1202
 
905
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1203
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
906
1204
  """
907
- Decorator that helps cache, version and store models/datasets from huggingface hub.
908
-
909
- > Examples
910
-
911
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
912
- ```python
913
- @huggingface_hub
914
- @step
915
- def pull_model_from_huggingface(self):
916
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
917
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
918
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
919
- # value of the function is a reference to the model in the backend storage.
920
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
1205
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
921
1206
 
922
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
923
- self.llama_model = current.huggingface_hub.snapshot_download(
924
- repo_id=self.model_id,
925
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
926
- )
927
- self.next(self.train)
928
- ```
1207
+ User code call
1208
+ --------------
1209
+ @vllm(
1210
+ model="...",
1211
+ ...
1212
+ )
929
1213
 
930
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
931
- ```python
932
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
933
- @step
934
- def pull_model_from_huggingface(self):
935
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
936
- ```
1214
+ Valid backend options
1215
+ ---------------------
1216
+ - 'local': Run as a separate process on the local task machine.
937
1217
 
938
- ```python
939
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
940
- @step
941
- def finetune_model(self):
942
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
943
- # path_to_model will be /my-directory
944
- ```
1218
+ Valid model options
1219
+ -------------------
1220
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
945
1221
 
946
- ```python
947
- # Takes all the arguments passed to `snapshot_download`
948
- # except for `local_dir`
949
- @huggingface_hub(load=[
950
- {
951
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
952
- },
953
- {
954
- "repo_id": "myorg/mistral-lora",
955
- "repo_type": "model",
956
- },
957
- ])
958
- @step
959
- def finetune_model(self):
960
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
961
- # path_to_model will be /my-directory
962
- ```
1222
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1223
+ If you need multiple models, you must create multiple @vllm decorators.
963
1224
 
964
1225
 
965
1226
  Parameters
966
1227
  ----------
967
- temp_dir_root : str, optional
968
- The root directory that will hold the temporary directory where objects will be downloaded.
969
-
970
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
971
- The list of repos (models/datasets) to load.
972
-
973
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
974
-
975
- - If repo (model/dataset) is not found in the datastore:
976
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
977
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
978
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
979
-
980
- - If repo is found in the datastore:
981
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
1228
+ model: str
1229
+ HuggingFace model identifier to be served by vLLM.
1230
+ backend: str
1231
+ Determines where and how to run the vLLM process.
1232
+ openai_api_server: bool
1233
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1234
+ Default is False (uses native engine).
1235
+ Set to True for backward compatibility with existing code.
1236
+ debug: bool
1237
+ Whether to turn on verbose debugging logs.
1238
+ card_refresh_interval: int
1239
+ Interval in seconds for refreshing the vLLM status card.
1240
+ Only used when openai_api_server=True.
1241
+ max_retries: int
1242
+ Maximum number of retries checking for vLLM server startup.
1243
+ Only used when openai_api_server=True.
1244
+ retry_alert_frequency: int
1245
+ Frequency of alert logs for vLLM server startup retries.
1246
+ Only used when openai_api_server=True.
1247
+ engine_args : dict
1248
+ Additional keyword arguments to pass to the vLLM engine.
1249
+ For example, `tensor_parallel_size=2`.
982
1250
  """
983
1251
  ...
984
1252
 
@@ -1062,383 +1330,236 @@ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None]
1062
1330
  ...
1063
1331
 
1064
1332
  @typing.overload
1065
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1333
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1066
1334
  """
1067
- Specifies that the step will success under all circumstances.
1068
-
1069
- The decorator will create an optional artifact, specified by `var`, which
1070
- contains the exception raised. You can use it to detect the presence
1071
- of errors, indicating that all happy-path artifacts produced by the step
1072
- are missing.
1073
-
1074
-
1075
- Parameters
1076
- ----------
1077
- var : str, optional, default None
1078
- Name of the artifact in which to store the caught exception.
1079
- If not specified, the exception is not stored.
1080
- print_exception : bool, default True
1081
- Determines whether or not the exception is printed to
1082
- stdout when caught.
1335
+ Decorator prototype for all step decorators. This function gets specialized
1336
+ and imported for all decorators types by _import_plugin_decorators().
1083
1337
  """
1084
1338
  ...
1085
1339
 
1086
1340
  @typing.overload
1087
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1088
- ...
1089
-
1090
- @typing.overload
1091
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1092
- ...
1093
-
1094
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1095
- """
1096
- Specifies that the step will success under all circumstances.
1097
-
1098
- The decorator will create an optional artifact, specified by `var`, which
1099
- contains the exception raised. You can use it to detect the presence
1100
- of errors, indicating that all happy-path artifacts produced by the step
1101
- are missing.
1102
-
1103
-
1104
- Parameters
1105
- ----------
1106
- var : str, optional, default None
1107
- Name of the artifact in which to store the caught exception.
1108
- If not specified, the exception is not stored.
1109
- print_exception : bool, default True
1110
- Determines whether or not the exception is printed to
1111
- stdout when caught.
1112
- """
1341
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1113
1342
  ...
1114
1343
 
1115
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1344
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1116
1345
  """
1117
- Specifies that this step should execute on Kubernetes.
1118
-
1119
-
1120
- Parameters
1121
- ----------
1122
- cpu : int, default 1
1123
- Number of CPUs required for this step. If `@resources` is
1124
- also present, the maximum value from all decorators is used.
1125
- memory : int, default 4096
1126
- Memory size (in MB) required for this step. If
1127
- `@resources` is also present, the maximum value from all decorators is
1128
- used.
1129
- disk : int, default 10240
1130
- Disk size (in MB) required for this step. If
1131
- `@resources` is also present, the maximum value from all decorators is
1132
- used.
1133
- image : str, optional, default None
1134
- Docker image to use when launching on Kubernetes. If not specified, and
1135
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1136
- not, a default Docker image mapping to the current version of Python is used.
1137
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1138
- If given, the imagePullPolicy to be applied to the Docker image of the step.
1139
- image_pull_secrets: List[str], default []
1140
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
1141
- Kubernetes image pull secrets to use when pulling container images
1142
- in Kubernetes.
1143
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1144
- Kubernetes service account to use when launching pod in Kubernetes.
1145
- secrets : List[str], optional, default None
1146
- Kubernetes secrets to use when launching pod in Kubernetes. These
1147
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1148
- in Metaflow configuration.
1149
- node_selector: Union[Dict[str,str], str], optional, default None
1150
- Kubernetes node selector(s) to apply to the pod running the task.
1151
- Can be passed in as a comma separated string of values e.g.
1152
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
1153
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
1154
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1155
- Kubernetes namespace to use when launching pod in Kubernetes.
1156
- gpu : int, optional, default None
1157
- Number of GPUs required for this step. A value of zero implies that
1158
- the scheduled node should not have GPUs.
1159
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1160
- The vendor of the GPUs to be used for this step.
1161
- tolerations : List[Dict[str,str]], default []
1162
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1163
- Kubernetes tolerations to use when launching pod in Kubernetes.
1164
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
1165
- Kubernetes labels to use when launching pod in Kubernetes.
1166
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
1167
- Kubernetes annotations to use when launching pod in Kubernetes.
1168
- use_tmpfs : bool, default False
1169
- This enables an explicit tmpfs mount for this step.
1170
- tmpfs_tempdir : bool, default True
1171
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1172
- tmpfs_size : int, optional, default: None
1173
- The value for the size (in MiB) of the tmpfs mount for this step.
1174
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1175
- memory allocated for this step.
1176
- tmpfs_path : str, optional, default /metaflow_temp
1177
- Path to tmpfs mount for this step.
1178
- persistent_volume_claims : Dict[str, str], optional, default None
1179
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1180
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1181
- shared_memory: int, optional
1182
- Shared memory size (in MiB) required for this step
1183
- port: int, optional
1184
- Port number to specify in the Kubernetes job object
1185
- compute_pool : str, optional, default None
1186
- Compute pool to be used for for this step.
1187
- If not specified, any accessible compute pool within the perimeter is used.
1188
- hostname_resolution_timeout: int, default 10 * 60
1189
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
1190
- Only applicable when @parallel is used.
1191
- qos: str, default: Burstable
1192
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
1193
-
1194
- security_context: Dict[str, Any], optional, default None
1195
- Container security context. Applies to the task container. Allows the following keys:
1196
- - privileged: bool, optional, default None
1197
- - allow_privilege_escalation: bool, optional, default None
1198
- - run_as_user: int, optional, default None
1199
- - run_as_group: int, optional, default None
1200
- - run_as_non_root: bool, optional, default None
1346
+ Decorator prototype for all step decorators. This function gets specialized
1347
+ and imported for all decorators types by _import_plugin_decorators().
1201
1348
  """
1202
1349
  ...
1203
1350
 
1204
1351
  @typing.overload
1205
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1352
+ def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1206
1353
  """
1207
- Enables checkpointing for a step.
1208
-
1209
- > Examples
1210
-
1211
- - Saving Checkpoints
1354
+ Specifies the flow(s) that this flow depends on.
1212
1355
 
1213
- ```python
1214
- @checkpoint
1215
- @step
1216
- def train(self):
1217
- model = create_model(self.parameters, checkpoint_path = None)
1218
- for i in range(self.epochs):
1219
- # some training logic
1220
- loss = model.train(self.dataset)
1221
- if i % 10 == 0:
1222
- model.save(
1223
- current.checkpoint.directory,
1224
- )
1225
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1226
- # and returns a reference dictionary to the checkpoint saved in the datastore
1227
- self.latest_checkpoint = current.checkpoint.save(
1228
- name="epoch_checkpoint",
1229
- metadata={
1230
- "epoch": i,
1231
- "loss": loss,
1232
- }
1233
- )
1234
1356
  ```
1357
+ @trigger_on_finish(flow='FooFlow')
1358
+ ```
1359
+ or
1360
+ ```
1361
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1362
+ ```
1363
+ This decorator respects the @project decorator and triggers the flow
1364
+ when upstream runs within the same namespace complete successfully
1235
1365
 
1236
- - Using Loaded Checkpoints
1237
-
1238
- ```python
1239
- @retry(times=3)
1240
- @checkpoint
1241
- @step
1242
- def train(self):
1243
- # Assume that the task has restarted and the previous attempt of the task
1244
- # saved a checkpoint
1245
- checkpoint_path = None
1246
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1247
- print("Loaded checkpoint from the previous attempt")
1248
- checkpoint_path = current.checkpoint.directory
1366
+ Additionally, you can specify project aware upstream flow dependencies
1367
+ by specifying the fully qualified project_flow_name.
1368
+ ```
1369
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1370
+ ```
1371
+ or
1372
+ ```
1373
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1374
+ ```
1249
1375
 
1250
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1251
- for i in range(self.epochs):
1252
- ...
1376
+ You can also specify just the project or project branch (other values will be
1377
+ inferred from the current project or project branch):
1378
+ ```
1379
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1253
1380
  ```
1254
1381
 
1382
+ Note that `branch` is typically one of:
1383
+ - `prod`
1384
+ - `user.bob`
1385
+ - `test.my_experiment`
1386
+ - `prod.staging`
1387
+
1255
1388
 
1256
1389
  Parameters
1257
1390
  ----------
1258
- load_policy : str, default: "fresh"
1259
- The policy for loading the checkpoint. The following policies are supported:
1260
- - "eager": Loads the the latest available checkpoint within the namespace.
1261
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1262
- will be loaded at the start of the task.
1263
- - "none": Do not load any checkpoint
1264
- - "fresh": Loads the lastest checkpoint created within the running Task.
1265
- This mode helps loading checkpoints across various retry attempts of the same task.
1266
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1267
- created within the task will be loaded when the task is retries execution on failure.
1268
-
1269
- temp_dir_root : str, default: None
1270
- The root directory under which `current.checkpoint.directory` will be created.
1391
+ flow : Union[str, Dict[str, str]], optional, default None
1392
+ Upstream flow dependency for this flow.
1393
+ flows : List[Union[str, Dict[str, str]]], default []
1394
+ Upstream flow dependencies for this flow.
1395
+ options : Dict[str, Any], default {}
1396
+ Backend-specific configuration for tuning eventing behavior.
1271
1397
  """
1272
1398
  ...
1273
1399
 
1274
1400
  @typing.overload
1275
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1276
- ...
1277
-
1278
- @typing.overload
1279
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1401
+ def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1280
1402
  ...
1281
1403
 
1282
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
1404
+ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1283
1405
  """
1284
- Enables checkpointing for a step.
1285
-
1286
- > Examples
1287
-
1288
- - Saving Checkpoints
1406
+ Specifies the flow(s) that this flow depends on.
1289
1407
 
1290
- ```python
1291
- @checkpoint
1292
- @step
1293
- def train(self):
1294
- model = create_model(self.parameters, checkpoint_path = None)
1295
- for i in range(self.epochs):
1296
- # some training logic
1297
- loss = model.train(self.dataset)
1298
- if i % 10 == 0:
1299
- model.save(
1300
- current.checkpoint.directory,
1301
- )
1302
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1303
- # and returns a reference dictionary to the checkpoint saved in the datastore
1304
- self.latest_checkpoint = current.checkpoint.save(
1305
- name="epoch_checkpoint",
1306
- metadata={
1307
- "epoch": i,
1308
- "loss": loss,
1309
- }
1310
- )
1311
1408
  ```
1409
+ @trigger_on_finish(flow='FooFlow')
1410
+ ```
1411
+ or
1412
+ ```
1413
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1414
+ ```
1415
+ This decorator respects the @project decorator and triggers the flow
1416
+ when upstream runs within the same namespace complete successfully
1312
1417
 
1313
- - Using Loaded Checkpoints
1314
-
1315
- ```python
1316
- @retry(times=3)
1317
- @checkpoint
1318
- @step
1319
- def train(self):
1320
- # Assume that the task has restarted and the previous attempt of the task
1321
- # saved a checkpoint
1322
- checkpoint_path = None
1323
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1324
- print("Loaded checkpoint from the previous attempt")
1325
- checkpoint_path = current.checkpoint.directory
1418
+ Additionally, you can specify project aware upstream flow dependencies
1419
+ by specifying the fully qualified project_flow_name.
1420
+ ```
1421
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1422
+ ```
1423
+ or
1424
+ ```
1425
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1426
+ ```
1326
1427
 
1327
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1328
- for i in range(self.epochs):
1329
- ...
1428
+ You can also specify just the project or project branch (other values will be
1429
+ inferred from the current project or project branch):
1430
+ ```
1431
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1330
1432
  ```
1331
1433
 
1434
+ Note that `branch` is typically one of:
1435
+ - `prod`
1436
+ - `user.bob`
1437
+ - `test.my_experiment`
1438
+ - `prod.staging`
1439
+
1332
1440
 
1333
1441
  Parameters
1334
1442
  ----------
1335
- load_policy : str, default: "fresh"
1336
- The policy for loading the checkpoint. The following policies are supported:
1337
- - "eager": Loads the the latest available checkpoint within the namespace.
1338
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1339
- will be loaded at the start of the task.
1340
- - "none": Do not load any checkpoint
1341
- - "fresh": Loads the lastest checkpoint created within the running Task.
1342
- This mode helps loading checkpoints across various retry attempts of the same task.
1343
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1344
- created within the task will be loaded when the task is retries execution on failure.
1345
-
1346
- temp_dir_root : str, default: None
1347
- The root directory under which `current.checkpoint.directory` will be created.
1443
+ flow : Union[str, Dict[str, str]], optional, default None
1444
+ Upstream flow dependency for this flow.
1445
+ flows : List[Union[str, Dict[str, str]]], default []
1446
+ Upstream flow dependencies for this flow.
1447
+ options : Dict[str, Any], default {}
1448
+ Backend-specific configuration for tuning eventing behavior.
1348
1449
  """
1349
1450
  ...
1350
1451
 
1351
- @typing.overload
1352
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1452
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1353
1453
  """
1354
- Specifies the Conda environment for all steps of the flow.
1454
+ Allows setting external datastores to save data for the
1455
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1355
1456
 
1356
- Use `@conda_base` to set common libraries required by all
1357
- steps and use `@conda` to specify step-specific additions.
1457
+ This decorator is useful when users wish to save data to a different datastore
1458
+ than what is configured in Metaflow. This can be for variety of reasons:
1358
1459
 
1460
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1461
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1462
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1463
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1464
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1359
1465
 
1360
- Parameters
1466
+ Usage:
1361
1467
  ----------
1362
- packages : Dict[str, str], default {}
1363
- Packages to use for this flow. The key is the name of the package
1364
- and the value is the version to use.
1365
- libraries : Dict[str, str], default {}
1366
- Supported for backward compatibility. When used with packages, packages will take precedence.
1367
- python : str, optional, default None
1368
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1369
- that the version used will correspond to the version of the Python interpreter used to start the run.
1370
- disabled : bool, default False
1371
- If set to True, disables Conda.
1372
- """
1373
- ...
1374
-
1375
- @typing.overload
1376
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1377
- ...
1378
-
1379
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1380
- """
1381
- Specifies the Conda environment for all steps of the flow.
1382
1468
 
1383
- Use `@conda_base` to set common libraries required by all
1384
- steps and use `@conda` to specify step-specific additions.
1469
+ - Using a custom IAM role to access the datastore.
1470
+
1471
+ ```python
1472
+ @with_artifact_store(
1473
+ type="s3",
1474
+ config=lambda: {
1475
+ "root": "s3://my-bucket-foo/path/to/root",
1476
+ "role_arn": ROLE,
1477
+ },
1478
+ )
1479
+ class MyFlow(FlowSpec):
1480
+
1481
+ @checkpoint
1482
+ @step
1483
+ def start(self):
1484
+ with open("my_file.txt", "w") as f:
1485
+ f.write("Hello, World!")
1486
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1487
+ self.next(self.end)
1488
+
1489
+ ```
1490
+
1491
+ - Using credentials to access the s3-compatible datastore.
1492
+
1493
+ ```python
1494
+ @with_artifact_store(
1495
+ type="s3",
1496
+ config=lambda: {
1497
+ "root": "s3://my-bucket-foo/path/to/root",
1498
+ "client_params": {
1499
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1500
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1501
+ },
1502
+ },
1503
+ )
1504
+ class MyFlow(FlowSpec):
1505
+
1506
+ @checkpoint
1507
+ @step
1508
+ def start(self):
1509
+ with open("my_file.txt", "w") as f:
1510
+ f.write("Hello, World!")
1511
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1512
+ self.next(self.end)
1513
+
1514
+ ```
1385
1515
 
1516
+ - Accessing objects stored in external datastores after task execution.
1386
1517
 
1387
- Parameters
1518
+ ```python
1519
+ run = Run("CheckpointsTestsFlow/8992")
1520
+ with artifact_store_from(run=run, config={
1521
+ "client_params": {
1522
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1523
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1524
+ },
1525
+ }):
1526
+ with Checkpoint() as cp:
1527
+ latest = cp.list(
1528
+ task=run["start"].task
1529
+ )[0]
1530
+ print(latest)
1531
+ cp.load(
1532
+ latest,
1533
+ "test-checkpoints"
1534
+ )
1535
+
1536
+ task = Task("TorchTuneFlow/8484/train/53673")
1537
+ with artifact_store_from(run=run, config={
1538
+ "client_params": {
1539
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1540
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1541
+ },
1542
+ }):
1543
+ load_model(
1544
+ task.data.model_ref,
1545
+ "test-models"
1546
+ )
1547
+ ```
1548
+ Parameters:
1388
1549
  ----------
1389
- packages : Dict[str, str], default {}
1390
- Packages to use for this flow. The key is the name of the package
1391
- and the value is the version to use.
1392
- libraries : Dict[str, str], default {}
1393
- Supported for backward compatibility. When used with packages, packages will take precedence.
1394
- python : str, optional, default None
1395
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1396
- that the version used will correspond to the version of the Python interpreter used to start the run.
1397
- disabled : bool, default False
1398
- If set to True, disables Conda.
1399
- """
1400
- ...
1401
-
1402
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1403
- """
1404
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1405
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1406
1550
 
1551
+ type: str
1552
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1407
1553
 
1408
- Parameters
1409
- ----------
1410
- timeout : int
1411
- Time, in seconds before the task times out and fails. (Default: 3600)
1412
- poke_interval : int
1413
- Time in seconds that the job should wait in between each try. (Default: 60)
1414
- mode : str
1415
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1416
- exponential_backoff : bool
1417
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1418
- pool : str
1419
- the slot pool this task should run in,
1420
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1421
- soft_fail : bool
1422
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1423
- name : str
1424
- Name of the sensor on Airflow
1425
- description : str
1426
- Description of sensor in the Airflow UI
1427
- external_dag_id : str
1428
- The dag_id that contains the task you want to wait for.
1429
- external_task_ids : List[str]
1430
- The list of task_ids that you want to wait for.
1431
- If None (default value) the sensor waits for the DAG. (Default: None)
1432
- allowed_states : List[str]
1433
- Iterable of allowed states, (Default: ['success'])
1434
- failed_states : List[str]
1435
- Iterable of failed or dis-allowed states. (Default: None)
1436
- execution_delta : datetime.timedelta
1437
- time difference with the previous execution to look at,
1438
- the default is the same logical date as the current task or DAG. (Default: None)
1439
- check_existence: bool
1440
- Set to True to check if the external task exists or check if
1441
- the DAG to wait for exists. (Default: True)
1554
+ config: dict or Callable
1555
+ Dictionary of configuration options for the datastore. The following keys are required:
1556
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1557
+ - example: 's3://bucket-name/path/to/root'
1558
+ - example: 'gs://bucket-name/path/to/root'
1559
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1560
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1561
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1562
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1442
1563
  """
1443
1564
  ...
1444
1565
 
@@ -1493,90 +1614,6 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
1493
1614
  """
1494
1615
  ...
1495
1616
 
1496
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1497
- """
1498
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1499
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1500
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1501
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1502
- starts only after all sensors finish.
1503
-
1504
-
1505
- Parameters
1506
- ----------
1507
- timeout : int
1508
- Time, in seconds before the task times out and fails. (Default: 3600)
1509
- poke_interval : int
1510
- Time in seconds that the job should wait in between each try. (Default: 60)
1511
- mode : str
1512
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1513
- exponential_backoff : bool
1514
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1515
- pool : str
1516
- the slot pool this task should run in,
1517
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1518
- soft_fail : bool
1519
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1520
- name : str
1521
- Name of the sensor on Airflow
1522
- description : str
1523
- Description of sensor in the Airflow UI
1524
- bucket_key : Union[str, List[str]]
1525
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1526
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1527
- bucket_name : str
1528
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1529
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1530
- wildcard_match : bool
1531
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1532
- aws_conn_id : str
1533
- a reference to the s3 connection on Airflow. (Default: None)
1534
- verify : bool
1535
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1536
- """
1537
- ...
1538
-
1539
- @typing.overload
1540
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1541
- """
1542
- Specifies the PyPI packages for all steps of the flow.
1543
-
1544
- Use `@pypi_base` to set common packages required by all
1545
- steps and use `@pypi` to specify step-specific overrides.
1546
-
1547
- Parameters
1548
- ----------
1549
- packages : Dict[str, str], default: {}
1550
- Packages to use for this flow. The key is the name of the package
1551
- and the value is the version to use.
1552
- python : str, optional, default: None
1553
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1554
- that the version used will correspond to the version of the Python interpreter used to start the run.
1555
- """
1556
- ...
1557
-
1558
- @typing.overload
1559
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1560
- ...
1561
-
1562
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1563
- """
1564
- Specifies the PyPI packages for all steps of the flow.
1565
-
1566
- Use `@pypi_base` to set common packages required by all
1567
- steps and use `@pypi` to specify step-specific overrides.
1568
-
1569
- Parameters
1570
- ----------
1571
- packages : Dict[str, str], default: {}
1572
- Packages to use for this flow. The key is the name of the package
1573
- and the value is the version to use.
1574
- python : str, optional, default: None
1575
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1576
- that the version used will correspond to the version of the Python interpreter used to start the run.
1577
- """
1578
- ...
1579
-
1580
1617
  @typing.overload
1581
1618
  def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1582
1619
  """
@@ -1671,103 +1708,86 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1671
1708
  ...
1672
1709
 
1673
1710
  @typing.overload
1674
- def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1711
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1675
1712
  """
1676
- Specifies the flow(s) that this flow depends on.
1677
-
1678
- ```
1679
- @trigger_on_finish(flow='FooFlow')
1680
- ```
1681
- or
1682
- ```
1683
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1684
- ```
1685
- This decorator respects the @project decorator and triggers the flow
1686
- when upstream runs within the same namespace complete successfully
1687
-
1688
- Additionally, you can specify project aware upstream flow dependencies
1689
- by specifying the fully qualified project_flow_name.
1690
- ```
1691
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1692
- ```
1693
- or
1694
- ```
1695
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1696
- ```
1697
-
1698
- You can also specify just the project or project branch (other values will be
1699
- inferred from the current project or project branch):
1700
- ```
1701
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1702
- ```
1703
-
1704
- Note that `branch` is typically one of:
1705
- - `prod`
1706
- - `user.bob`
1707
- - `test.my_experiment`
1708
- - `prod.staging`
1713
+ Specifies the PyPI packages for all steps of the flow.
1709
1714
 
1715
+ Use `@pypi_base` to set common packages required by all
1716
+ steps and use `@pypi` to specify step-specific overrides.
1710
1717
 
1711
1718
  Parameters
1712
1719
  ----------
1713
- flow : Union[str, Dict[str, str]], optional, default None
1714
- Upstream flow dependency for this flow.
1715
- flows : List[Union[str, Dict[str, str]]], default []
1716
- Upstream flow dependencies for this flow.
1717
- options : Dict[str, Any], default {}
1718
- Backend-specific configuration for tuning eventing behavior.
1720
+ packages : Dict[str, str], default: {}
1721
+ Packages to use for this flow. The key is the name of the package
1722
+ and the value is the version to use.
1723
+ python : str, optional, default: None
1724
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1725
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1719
1726
  """
1720
1727
  ...
1721
1728
 
1722
1729
  @typing.overload
1723
- def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1730
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1731
+ ...
1732
+
1733
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1734
+ """
1735
+ Specifies the PyPI packages for all steps of the flow.
1736
+
1737
+ Use `@pypi_base` to set common packages required by all
1738
+ steps and use `@pypi` to specify step-specific overrides.
1739
+
1740
+ Parameters
1741
+ ----------
1742
+ packages : Dict[str, str], default: {}
1743
+ Packages to use for this flow. The key is the name of the package
1744
+ and the value is the version to use.
1745
+ python : str, optional, default: None
1746
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1747
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1748
+ """
1724
1749
  ...
1725
1750
 
1726
- def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1751
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1727
1752
  """
1728
- Specifies the flow(s) that this flow depends on.
1729
-
1730
- ```
1731
- @trigger_on_finish(flow='FooFlow')
1732
- ```
1733
- or
1734
- ```
1735
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1736
- ```
1737
- This decorator respects the @project decorator and triggers the flow
1738
- when upstream runs within the same namespace complete successfully
1739
-
1740
- Additionally, you can specify project aware upstream flow dependencies
1741
- by specifying the fully qualified project_flow_name.
1742
- ```
1743
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1744
- ```
1745
- or
1746
- ```
1747
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1748
- ```
1749
-
1750
- You can also specify just the project or project branch (other values will be
1751
- inferred from the current project or project branch):
1752
- ```
1753
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1754
- ```
1755
-
1756
- Note that `branch` is typically one of:
1757
- - `prod`
1758
- - `user.bob`
1759
- - `test.my_experiment`
1760
- - `prod.staging`
1753
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1754
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1761
1755
 
1762
1756
 
1763
1757
  Parameters
1764
1758
  ----------
1765
- flow : Union[str, Dict[str, str]], optional, default None
1766
- Upstream flow dependency for this flow.
1767
- flows : List[Union[str, Dict[str, str]]], default []
1768
- Upstream flow dependencies for this flow.
1769
- options : Dict[str, Any], default {}
1770
- Backend-specific configuration for tuning eventing behavior.
1759
+ timeout : int
1760
+ Time, in seconds before the task times out and fails. (Default: 3600)
1761
+ poke_interval : int
1762
+ Time in seconds that the job should wait in between each try. (Default: 60)
1763
+ mode : str
1764
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1765
+ exponential_backoff : bool
1766
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1767
+ pool : str
1768
+ the slot pool this task should run in,
1769
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1770
+ soft_fail : bool
1771
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1772
+ name : str
1773
+ Name of the sensor on Airflow
1774
+ description : str
1775
+ Description of sensor in the Airflow UI
1776
+ external_dag_id : str
1777
+ The dag_id that contains the task you want to wait for.
1778
+ external_task_ids : List[str]
1779
+ The list of task_ids that you want to wait for.
1780
+ If None (default value) the sensor waits for the DAG. (Default: None)
1781
+ allowed_states : List[str]
1782
+ Iterable of allowed states, (Default: ['success'])
1783
+ failed_states : List[str]
1784
+ Iterable of failed or dis-allowed states. (Default: None)
1785
+ execution_delta : datetime.timedelta
1786
+ time difference with the previous execution to look at,
1787
+ the default is the same logical date as the current task or DAG. (Default: None)
1788
+ check_existence: bool
1789
+ Set to True to check if the external task exists or check if
1790
+ the DAG to wait for exists. (Default: True)
1771
1791
  """
1772
1792
  ...
1773
1793
 
@@ -1806,117 +1826,97 @@ def project(*, name: str, branch: typing.Optional[str] = None, production: bool
1806
1826
  """
1807
1827
  ...
1808
1828
 
1809
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1829
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1810
1830
  """
1811
- Allows setting external datastores to save data for the
1812
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1813
-
1814
- This decorator is useful when users wish to save data to a different datastore
1815
- than what is configured in Metaflow. This can be for variety of reasons:
1831
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1832
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1833
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1834
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1835
+ starts only after all sensors finish.
1816
1836
 
1817
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1818
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1819
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1820
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1821
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1822
1837
 
1823
- Usage:
1838
+ Parameters
1824
1839
  ----------
1840
+ timeout : int
1841
+ Time, in seconds before the task times out and fails. (Default: 3600)
1842
+ poke_interval : int
1843
+ Time in seconds that the job should wait in between each try. (Default: 60)
1844
+ mode : str
1845
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1846
+ exponential_backoff : bool
1847
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1848
+ pool : str
1849
+ the slot pool this task should run in,
1850
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1851
+ soft_fail : bool
1852
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1853
+ name : str
1854
+ Name of the sensor on Airflow
1855
+ description : str
1856
+ Description of sensor in the Airflow UI
1857
+ bucket_key : Union[str, List[str]]
1858
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1859
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1860
+ bucket_name : str
1861
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1862
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1863
+ wildcard_match : bool
1864
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1865
+ aws_conn_id : str
1866
+ a reference to the s3 connection on Airflow. (Default: None)
1867
+ verify : bool
1868
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1869
+ """
1870
+ ...
1871
+
1872
+ @typing.overload
1873
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1874
+ """
1875
+ Specifies the Conda environment for all steps of the flow.
1825
1876
 
1826
- - Using a custom IAM role to access the datastore.
1827
-
1828
- ```python
1829
- @with_artifact_store(
1830
- type="s3",
1831
- config=lambda: {
1832
- "root": "s3://my-bucket-foo/path/to/root",
1833
- "role_arn": ROLE,
1834
- },
1835
- )
1836
- class MyFlow(FlowSpec):
1837
-
1838
- @checkpoint
1839
- @step
1840
- def start(self):
1841
- with open("my_file.txt", "w") as f:
1842
- f.write("Hello, World!")
1843
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1844
- self.next(self.end)
1845
-
1846
- ```
1847
-
1848
- - Using credentials to access the s3-compatible datastore.
1849
-
1850
- ```python
1851
- @with_artifact_store(
1852
- type="s3",
1853
- config=lambda: {
1854
- "root": "s3://my-bucket-foo/path/to/root",
1855
- "client_params": {
1856
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1857
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1858
- },
1859
- },
1860
- )
1861
- class MyFlow(FlowSpec):
1877
+ Use `@conda_base` to set common libraries required by all
1878
+ steps and use `@conda` to specify step-specific additions.
1862
1879
 
1863
- @checkpoint
1864
- @step
1865
- def start(self):
1866
- with open("my_file.txt", "w") as f:
1867
- f.write("Hello, World!")
1868
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1869
- self.next(self.end)
1870
1880
 
1871
- ```
1881
+ Parameters
1882
+ ----------
1883
+ packages : Dict[str, str], default {}
1884
+ Packages to use for this flow. The key is the name of the package
1885
+ and the value is the version to use.
1886
+ libraries : Dict[str, str], default {}
1887
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1888
+ python : str, optional, default None
1889
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1890
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1891
+ disabled : bool, default False
1892
+ If set to True, disables Conda.
1893
+ """
1894
+ ...
1895
+
1896
+ @typing.overload
1897
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1898
+ ...
1899
+
1900
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1901
+ """
1902
+ Specifies the Conda environment for all steps of the flow.
1872
1903
 
1873
- - Accessing objects stored in external datastores after task execution.
1904
+ Use `@conda_base` to set common libraries required by all
1905
+ steps and use `@conda` to specify step-specific additions.
1874
1906
 
1875
- ```python
1876
- run = Run("CheckpointsTestsFlow/8992")
1877
- with artifact_store_from(run=run, config={
1878
- "client_params": {
1879
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1880
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1881
- },
1882
- }):
1883
- with Checkpoint() as cp:
1884
- latest = cp.list(
1885
- task=run["start"].task
1886
- )[0]
1887
- print(latest)
1888
- cp.load(
1889
- latest,
1890
- "test-checkpoints"
1891
- )
1892
1907
 
1893
- task = Task("TorchTuneFlow/8484/train/53673")
1894
- with artifact_store_from(run=run, config={
1895
- "client_params": {
1896
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1897
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1898
- },
1899
- }):
1900
- load_model(
1901
- task.data.model_ref,
1902
- "test-models"
1903
- )
1904
- ```
1905
- Parameters:
1908
+ Parameters
1906
1909
  ----------
1907
-
1908
- type: str
1909
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1910
-
1911
- config: dict or Callable
1912
- Dictionary of configuration options for the datastore. The following keys are required:
1913
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1914
- - example: 's3://bucket-name/path/to/root'
1915
- - example: 'gs://bucket-name/path/to/root'
1916
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1917
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1918
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1919
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1910
+ packages : Dict[str, str], default {}
1911
+ Packages to use for this flow. The key is the name of the package
1912
+ and the value is the version to use.
1913
+ libraries : Dict[str, str], default {}
1914
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1915
+ python : str, optional, default None
1916
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1917
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1918
+ disabled : bool, default False
1919
+ If set to True, disables Conda.
1920
1920
  """
1921
1921
  ...
1922
1922