ob-metaflow-stubs 6.0.10.2rc0__py2.py3-none-any.whl → 6.0.10.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-stubs might be problematic. Click here for more details.

Files changed (262) hide show
  1. metaflow-stubs/__init__.pyi +1126 -1096
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +6 -6
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +5 -5
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +4 -4
  14. metaflow-stubs/meta_files.pyi +2 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +3 -3
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +2 -2
  20. metaflow-stubs/metaflow_current.pyi +51 -51
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +2 -2
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +4 -4
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +11 -5
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +5 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +3 -3
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +3 -3
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +42 -11
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +5 -5
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +3 -3
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +3 -3
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +2 -3
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +3 -3
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +4 -4
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +3 -3
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +4 -4
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +3 -3
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +4 -4
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +3 -3
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +6 -12
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +2 -2
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +4 -4
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +3 -3
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +3 -3
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +3 -3
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +2 -2
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +4 -4
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +2 -2
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +2 -2
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +3 -3
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -3
  115. metaflow-stubs/mf_extensions/outerbounds/toplevel/s3_proxy.pyi +2 -2
  116. metaflow-stubs/multicore_utils.pyi +2 -2
  117. metaflow-stubs/ob_internal.pyi +2 -3
  118. metaflow-stubs/packaging_sys/__init__.pyi +6 -6
  119. metaflow-stubs/packaging_sys/backend.pyi +4 -4
  120. metaflow-stubs/packaging_sys/distribution_support.pyi +4 -4
  121. metaflow-stubs/packaging_sys/tar_backend.pyi +6 -6
  122. metaflow-stubs/packaging_sys/utils.pyi +2 -2
  123. metaflow-stubs/packaging_sys/v1.pyi +4 -4
  124. metaflow-stubs/parameters.pyi +4 -4
  125. metaflow-stubs/plugins/__init__.pyi +14 -14
  126. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  127. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  128. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  129. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  130. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  131. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  132. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  133. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  134. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  135. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  136. metaflow-stubs/plugins/argo/argo_workflows.pyi +3 -3
  137. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +5 -5
  139. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +4 -4
  140. metaflow-stubs/plugins/argo/exit_hooks.pyi +3 -3
  141. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  142. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  143. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  144. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  145. metaflow-stubs/plugins/aws/batch/batch.pyi +2 -2
  146. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  147. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -2
  148. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  149. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  150. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  151. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  152. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  153. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +2 -2
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +4 -4
  156. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +4 -4
  157. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  158. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  159. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  160. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  161. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  162. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  163. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  164. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_client.pyi +3 -3
  166. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  167. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_decorator.pyi +3 -3
  169. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  170. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  171. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  172. metaflow-stubs/plugins/cards/card_modules/components.pyi +4 -4
  173. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  174. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  175. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  176. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  177. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  178. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  179. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  180. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  181. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  182. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  183. metaflow-stubs/plugins/datatools/s3/s3.pyi +5 -5
  184. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  185. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  186. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  187. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  188. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  189. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  190. metaflow-stubs/plugins/exit_hook/__init__.pyi +2 -2
  191. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +2 -2
  192. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  193. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  194. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  195. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  196. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  197. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  198. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  199. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  200. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  201. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +3 -3
  202. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +2 -2
  203. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  204. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +2 -2
  205. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  206. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  207. metaflow-stubs/plugins/ollama/__init__.pyi +3 -3
  208. metaflow-stubs/plugins/optuna/__init__.pyi +2 -2
  209. metaflow-stubs/plugins/parallel_decorator.pyi +2 -2
  210. metaflow-stubs/plugins/perimeters.pyi +2 -2
  211. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  212. metaflow-stubs/plugins/pypi/__init__.pyi +3 -3
  213. metaflow-stubs/plugins/pypi/conda_decorator.pyi +2 -2
  214. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -4
  215. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  216. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +2 -2
  217. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  218. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  219. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  220. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  221. metaflow-stubs/plugins/secrets/__init__.pyi +3 -3
  222. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  223. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  224. metaflow-stubs/plugins/secrets/secrets_func.pyi +2 -2
  225. metaflow-stubs/plugins/secrets/secrets_spec.pyi +2 -2
  226. metaflow-stubs/plugins/secrets/utils.pyi +2 -2
  227. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  228. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  229. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +3 -3
  230. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  231. metaflow-stubs/plugins/torchtune/__init__.pyi +2 -2
  232. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  233. metaflow-stubs/plugins/uv/uv_environment.pyi +2 -2
  234. metaflow-stubs/profilers/__init__.pyi +2 -2
  235. metaflow-stubs/pylint_wrapper.pyi +2 -2
  236. metaflow-stubs/runner/__init__.pyi +2 -2
  237. metaflow-stubs/runner/deployer.pyi +34 -34
  238. metaflow-stubs/runner/deployer_impl.pyi +3 -3
  239. metaflow-stubs/runner/metaflow_runner.pyi +4 -4
  240. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  241. metaflow-stubs/runner/nbrun.pyi +2 -2
  242. metaflow-stubs/runner/subprocess_manager.pyi +2 -2
  243. metaflow-stubs/runner/utils.pyi +2 -2
  244. metaflow-stubs/system/__init__.pyi +2 -2
  245. metaflow-stubs/system/system_logger.pyi +3 -3
  246. metaflow-stubs/system/system_monitor.pyi +2 -2
  247. metaflow-stubs/tagging_util.pyi +2 -2
  248. metaflow-stubs/tuple_util.pyi +2 -2
  249. metaflow-stubs/user_configs/__init__.pyi +2 -2
  250. metaflow-stubs/user_configs/config_options.pyi +4 -4
  251. metaflow-stubs/user_configs/config_parameters.pyi +5 -5
  252. metaflow-stubs/user_decorators/__init__.pyi +2 -2
  253. metaflow-stubs/user_decorators/common.pyi +2 -2
  254. metaflow-stubs/user_decorators/mutable_flow.pyi +5 -5
  255. metaflow-stubs/user_decorators/mutable_step.pyi +5 -5
  256. metaflow-stubs/user_decorators/user_flow_decorator.pyi +5 -5
  257. metaflow-stubs/user_decorators/user_step_decorator.pyi +7 -7
  258. {ob_metaflow_stubs-6.0.10.2rc0.dist-info → ob_metaflow_stubs-6.0.10.3.dist-info}/METADATA +1 -1
  259. ob_metaflow_stubs-6.0.10.3.dist-info/RECORD +262 -0
  260. ob_metaflow_stubs-6.0.10.2rc0.dist-info/RECORD +0 -262
  261. {ob_metaflow_stubs-6.0.10.2rc0.dist-info → ob_metaflow_stubs-6.0.10.3.dist-info}/WHEEL +0 -0
  262. {ob_metaflow_stubs-6.0.10.2rc0.dist-info → ob_metaflow_stubs-6.0.10.3.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.18.3.2+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-09-09T23:55:12.839647 #
3
+ # MF version: 2.18.5.1+obcheckpoint(0.2.5);ob(v1) #
4
+ # Generated on 2025-09-16T18:01:26.529291 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import datetime
12
11
  import typing
12
+ import datetime
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -39,18 +39,18 @@ from .user_decorators.user_step_decorator import UserStepDecorator as UserStepDe
39
39
  from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
40
  from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
41
  from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
42
- from . import tuple_util as tuple_util
43
42
  from . import cards as cards
44
- from . import metaflow_git as metaflow_git
45
43
  from . import events as events
44
+ from . import metaflow_git as metaflow_git
45
+ from . import tuple_util as tuple_util
46
46
  from . import runner as runner
47
47
  from . import plugins as plugins
48
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
49
49
  from . import includefile as includefile
50
50
  from .includefile import IncludeFile as IncludeFile
51
- from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
52
- from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
53
51
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
52
+ from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
53
+ from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
54
54
  from . import client as client
55
55
  from .client.core import namespace as namespace
56
56
  from .client.core import get_namespace as get_namespace
@@ -83,7 +83,6 @@ from .mf_extensions.outerbounds.plugins.checkpoint_datastores.nebius import nebi
83
83
  from .mf_extensions.outerbounds.plugins.checkpoint_datastores.coreweave import coreweave_checkpoints as coreweave_checkpoints
84
84
  from .mf_extensions.outerbounds.plugins.aws.assume_role_decorator import assume_role as assume_role
85
85
  from .mf_extensions.outerbounds.plugins.apps.core.deployer import AppDeployer as AppDeployer
86
- from .mf_extensions.outerbounds.plugins.apps.core.deployer import DeployedApp as DeployedApp
87
86
  from . import system as system
88
87
  from . import cli_components as cli_components
89
88
  from . import pylint_wrapper as pylint_wrapper
@@ -169,104 +168,166 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
169
168
  ...
170
169
 
171
170
  @typing.overload
172
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
171
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
173
172
  """
174
- Specifies environment variables to be set prior to the execution of a step.
173
+ Specifies the number of times the task corresponding
174
+ to a step needs to be retried.
175
+
176
+ This decorator is useful for handling transient errors, such as networking issues.
177
+ If your task contains operations that can't be retried safely, e.g. database updates,
178
+ it is advisable to annotate it with `@retry(times=0)`.
179
+
180
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
181
+ decorator will execute a no-op task after all retries have been exhausted,
182
+ ensuring that the flow execution can continue.
175
183
 
176
184
 
177
185
  Parameters
178
186
  ----------
179
- vars : Dict[str, str], default {}
180
- Dictionary of environment variables to set.
187
+ times : int, default 3
188
+ Number of times to retry this task.
189
+ minutes_between_retries : int, default 2
190
+ Number of minutes between retries.
181
191
  """
182
192
  ...
183
193
 
184
194
  @typing.overload
185
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
195
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
186
196
  ...
187
197
 
188
198
  @typing.overload
189
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
199
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
190
200
  ...
191
201
 
192
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
202
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
193
203
  """
194
- Specifies environment variables to be set prior to the execution of a step.
204
+ Specifies the number of times the task corresponding
205
+ to a step needs to be retried.
206
+
207
+ This decorator is useful for handling transient errors, such as networking issues.
208
+ If your task contains operations that can't be retried safely, e.g. database updates,
209
+ it is advisable to annotate it with `@retry(times=0)`.
210
+
211
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
212
+ decorator will execute a no-op task after all retries have been exhausted,
213
+ ensuring that the flow execution can continue.
195
214
 
196
215
 
197
216
  Parameters
198
217
  ----------
199
- vars : Dict[str, str], default {}
200
- Dictionary of environment variables to set.
218
+ times : int, default 3
219
+ Number of times to retry this task.
220
+ minutes_between_retries : int, default 2
221
+ Number of minutes between retries.
201
222
  """
202
223
  ...
203
224
 
204
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
225
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
205
226
  """
206
- Specifies that this step should execute on DGX cloud.
227
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
228
+
229
+ User code call
230
+ --------------
231
+ @vllm(
232
+ model="...",
233
+ ...
234
+ )
235
+
236
+ Valid backend options
237
+ ---------------------
238
+ - 'local': Run as a separate process on the local task machine.
239
+
240
+ Valid model options
241
+ -------------------
242
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
243
+
244
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
245
+ If you need multiple models, you must create multiple @vllm decorators.
207
246
 
208
247
 
209
248
  Parameters
210
249
  ----------
211
- gpu : int
212
- Number of GPUs to use.
213
- gpu_type : str
214
- Type of Nvidia GPU to use.
215
- queue_timeout : int
216
- Time to keep the job in NVCF's queue.
250
+ model: str
251
+ HuggingFace model identifier to be served by vLLM.
252
+ backend: str
253
+ Determines where and how to run the vLLM process.
254
+ openai_api_server: bool
255
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
256
+ Default is False (uses native engine).
257
+ Set to True for backward compatibility with existing code.
258
+ debug: bool
259
+ Whether to turn on verbose debugging logs.
260
+ card_refresh_interval: int
261
+ Interval in seconds for refreshing the vLLM status card.
262
+ Only used when openai_api_server=True.
263
+ max_retries: int
264
+ Maximum number of retries checking for vLLM server startup.
265
+ Only used when openai_api_server=True.
266
+ retry_alert_frequency: int
267
+ Frequency of alert logs for vLLM server startup retries.
268
+ Only used when openai_api_server=True.
269
+ engine_args : dict
270
+ Additional keyword arguments to pass to the vLLM engine.
271
+ For example, `tensor_parallel_size=2`.
217
272
  """
218
273
  ...
219
274
 
220
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
275
+ @typing.overload
276
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
221
277
  """
222
- Specifies that this step should execute on DGX cloud.
223
-
224
-
225
- Parameters
226
- ----------
227
- gpu : int
228
- Number of GPUs to use.
229
- gpu_type : str
230
- Type of Nvidia GPU to use.
278
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
279
+ It exists to make it easier for users to know that this decorator should only be used with
280
+ a Neo Cloud like CoreWeave.
231
281
  """
232
282
  ...
233
283
 
234
284
  @typing.overload
235
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
285
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
286
+ ...
287
+
288
+ def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
236
289
  """
237
- Internal decorator to support Fast bakery
290
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
291
+ It exists to make it easier for users to know that this decorator should only be used with
292
+ a Neo Cloud like CoreWeave.
238
293
  """
239
294
  ...
240
295
 
241
296
  @typing.overload
242
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
297
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
298
+ """
299
+ Decorator prototype for all step decorators. This function gets specialized
300
+ and imported for all decorators types by _import_plugin_decorators().
301
+ """
243
302
  ...
244
303
 
245
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
304
+ @typing.overload
305
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
306
+ ...
307
+
308
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
246
309
  """
247
- Internal decorator to support Fast bakery
310
+ Decorator prototype for all step decorators. This function gets specialized
311
+ and imported for all decorators types by _import_plugin_decorators().
248
312
  """
249
313
  ...
250
314
 
251
- def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
315
+ @typing.overload
316
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
252
317
  """
253
- S3 Proxy decorator for routing S3 requests through a local proxy service.
254
-
255
-
256
- Parameters
257
- ----------
258
- integration_name : str, optional
259
- Name of the S3 proxy integration. If not specified, will use the only
260
- available S3 proxy integration in the namespace (fails if multiple exist).
261
- write_mode : str, optional
262
- The desired behavior during write operations to target (origin) S3 bucket.
263
- allowed options are:
264
- "origin-and-cache" -> write to both the target S3 bucket and local object
265
- storage
266
- "origin" -> only write to the target S3 bucket
267
- "cache" -> only write to the object storage service used for caching
268
- debug : bool, optional
269
- Enable debug logging for proxy operations.
318
+ A simple decorator that demonstrates using CardDecoratorInjector
319
+ to inject a card and render simple markdown content.
320
+ """
321
+ ...
322
+
323
+ @typing.overload
324
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
325
+ ...
326
+
327
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
328
+ """
329
+ A simple decorator that demonstrates using CardDecoratorInjector
330
+ to inject a card and render simple markdown content.
270
331
  """
271
332
  ...
272
333
 
@@ -329,143 +390,92 @@ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None],
329
390
  """
330
391
  ...
331
392
 
332
- @typing.overload
333
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
334
- """
335
- Specifies secrets to be retrieved and injected as environment variables prior to
336
- the execution of a step.
337
-
338
-
339
- Parameters
340
- ----------
341
- sources : List[Union[str, Dict[str, Any]]], default: []
342
- List of secret specs, defining how the secrets are to be retrieved
343
- role : str, optional, default: None
344
- Role to use for fetching secrets
345
- """
346
- ...
347
-
348
- @typing.overload
349
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
350
- ...
351
-
352
- @typing.overload
353
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
354
- ...
355
-
356
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
393
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
357
394
  """
358
- Specifies secrets to be retrieved and injected as environment variables prior to
359
- the execution of a step.
395
+ Specifies that this step should execute on Kubernetes.
360
396
 
361
397
 
362
398
  Parameters
363
399
  ----------
364
- sources : List[Union[str, Dict[str, Any]]], default: []
365
- List of secret specs, defining how the secrets are to be retrieved
366
- role : str, optional, default: None
367
- Role to use for fetching secrets
368
- """
369
- ...
370
-
371
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
372
- """
373
- Decorator that helps cache, version and store models/datasets from huggingface hub.
374
-
375
- > Examples
376
-
377
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
378
- ```python
379
- @huggingface_hub
380
- @step
381
- def pull_model_from_huggingface(self):
382
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
383
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
384
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
385
- # value of the function is a reference to the model in the backend storage.
386
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
387
-
388
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
389
- self.llama_model = current.huggingface_hub.snapshot_download(
390
- repo_id=self.model_id,
391
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
392
- )
393
- self.next(self.train)
394
- ```
395
-
396
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
397
- ```python
398
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
399
- @step
400
- def pull_model_from_huggingface(self):
401
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
402
- ```
403
-
404
- ```python
405
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
406
- @step
407
- def finetune_model(self):
408
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
409
- # path_to_model will be /my-directory
410
- ```
411
-
412
- ```python
413
- # Takes all the arguments passed to `snapshot_download`
414
- # except for `local_dir`
415
- @huggingface_hub(load=[
416
- {
417
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
418
- },
419
- {
420
- "repo_id": "myorg/mistral-lora",
421
- "repo_type": "model",
422
- },
423
- ])
424
- @step
425
- def finetune_model(self):
426
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
427
- # path_to_model will be /my-directory
428
- ```
429
-
430
-
431
- Parameters
432
- ----------
433
- temp_dir_root : str, optional
434
- The root directory that will hold the temporary directory where objects will be downloaded.
435
-
436
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
437
- The list of repos (models/datasets) to load.
438
-
439
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
440
-
441
- - If repo (model/dataset) is not found in the datastore:
442
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
443
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
444
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
400
+ cpu : int, default 1
401
+ Number of CPUs required for this step. If `@resources` is
402
+ also present, the maximum value from all decorators is used.
403
+ memory : int, default 4096
404
+ Memory size (in MB) required for this step. If
405
+ `@resources` is also present, the maximum value from all decorators is
406
+ used.
407
+ disk : int, default 10240
408
+ Disk size (in MB) required for this step. If
409
+ `@resources` is also present, the maximum value from all decorators is
410
+ used.
411
+ image : str, optional, default None
412
+ Docker image to use when launching on Kubernetes. If not specified, and
413
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
414
+ not, a default Docker image mapping to the current version of Python is used.
415
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
416
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
417
+ image_pull_secrets: List[str], default []
418
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
419
+ Kubernetes image pull secrets to use when pulling container images
420
+ in Kubernetes.
421
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
422
+ Kubernetes service account to use when launching pod in Kubernetes.
423
+ secrets : List[str], optional, default None
424
+ Kubernetes secrets to use when launching pod in Kubernetes. These
425
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
426
+ in Metaflow configuration.
427
+ node_selector: Union[Dict[str,str], str], optional, default None
428
+ Kubernetes node selector(s) to apply to the pod running the task.
429
+ Can be passed in as a comma separated string of values e.g.
430
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
431
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
432
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
433
+ Kubernetes namespace to use when launching pod in Kubernetes.
434
+ gpu : int, optional, default None
435
+ Number of GPUs required for this step. A value of zero implies that
436
+ the scheduled node should not have GPUs.
437
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
438
+ The vendor of the GPUs to be used for this step.
439
+ tolerations : List[Dict[str,str]], default []
440
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
441
+ Kubernetes tolerations to use when launching pod in Kubernetes.
442
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
443
+ Kubernetes labels to use when launching pod in Kubernetes.
444
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
445
+ Kubernetes annotations to use when launching pod in Kubernetes.
446
+ use_tmpfs : bool, default False
447
+ This enables an explicit tmpfs mount for this step.
448
+ tmpfs_tempdir : bool, default True
449
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
450
+ tmpfs_size : int, optional, default: None
451
+ The value for the size (in MiB) of the tmpfs mount for this step.
452
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
453
+ memory allocated for this step.
454
+ tmpfs_path : str, optional, default /metaflow_temp
455
+ Path to tmpfs mount for this step.
456
+ persistent_volume_claims : Dict[str, str], optional, default None
457
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
458
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
459
+ shared_memory: int, optional
460
+ Shared memory size (in MiB) required for this step
461
+ port: int, optional
462
+ Port number to specify in the Kubernetes job object
463
+ compute_pool : str, optional, default None
464
+ Compute pool to be used for for this step.
465
+ If not specified, any accessible compute pool within the perimeter is used.
466
+ hostname_resolution_timeout: int, default 10 * 60
467
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
468
+ Only applicable when @parallel is used.
469
+ qos: str, default: Burstable
470
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
445
471
 
446
- - If repo is found in the datastore:
447
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
448
- """
449
- ...
450
-
451
- @typing.overload
452
- def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
453
- """
454
- CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
455
- It exists to make it easier for users to know that this decorator should only be used with
456
- a Neo Cloud like CoreWeave.
457
- """
458
- ...
459
-
460
- @typing.overload
461
- def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
462
- ...
463
-
464
- def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
465
- """
466
- CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
467
- It exists to make it easier for users to know that this decorator should only be used with
468
- a Neo Cloud like CoreWeave.
472
+ security_context: Dict[str, Any], optional, default None
473
+ Container security context. Applies to the task container. Allows the following keys:
474
+ - privileged: bool, optional, default None
475
+ - allow_privilege_escalation: bool, optional, default None
476
+ - run_as_user: int, optional, default None
477
+ - run_as_group: int, optional, default None
478
+ - run_as_non_root: bool, optional, default None
469
479
  """
470
480
  ...
471
481
 
@@ -519,348 +529,70 @@ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
519
529
  ...
520
530
 
521
531
  @typing.overload
522
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
532
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
523
533
  """
524
- Enables loading / saving of models within a step.
525
-
526
- > Examples
527
- - Saving Models
528
- ```python
529
- @model
530
- @step
531
- def train(self):
532
- # current.model.save returns a dictionary reference to the model saved
533
- self.my_model = current.model.save(
534
- path_to_my_model,
535
- label="my_model",
536
- metadata={
537
- "epochs": 10,
538
- "batch-size": 32,
539
- "learning-rate": 0.001,
540
- }
541
- )
542
- self.next(self.test)
543
-
544
- @model(load="my_model")
545
- @step
546
- def test(self):
547
- # `current.model.loaded` returns a dictionary of the loaded models
548
- # where the key is the name of the artifact and the value is the path to the model
549
- print(os.listdir(current.model.loaded["my_model"]))
550
- self.next(self.end)
551
- ```
534
+ Internal decorator to support Fast bakery
535
+ """
536
+ ...
537
+
538
+ @typing.overload
539
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
540
+ ...
541
+
542
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
543
+ """
544
+ Internal decorator to support Fast bakery
545
+ """
546
+ ...
547
+
548
+ @typing.overload
549
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
550
+ """
551
+ Specifies that the step will success under all circumstances.
552
552
 
553
- - Loading models
554
- ```python
555
- @step
556
- def train(self):
557
- # current.model.load returns the path to the model loaded
558
- checkpoint_path = current.model.load(
559
- self.checkpoint_key,
560
- )
561
- model_path = current.model.load(
562
- self.model,
563
- )
564
- self.next(self.test)
565
- ```
553
+ The decorator will create an optional artifact, specified by `var`, which
554
+ contains the exception raised. You can use it to detect the presence
555
+ of errors, indicating that all happy-path artifacts produced by the step
556
+ are missing.
566
557
 
567
558
 
568
559
  Parameters
569
560
  ----------
570
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
571
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
572
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
573
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
574
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
575
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
576
-
577
- temp_dir_root : str, default: None
578
- The root directory under which `current.model.loaded` will store loaded models
561
+ var : str, optional, default None
562
+ Name of the artifact in which to store the caught exception.
563
+ If not specified, the exception is not stored.
564
+ print_exception : bool, default True
565
+ Determines whether or not the exception is printed to
566
+ stdout when caught.
579
567
  """
580
568
  ...
581
569
 
582
570
  @typing.overload
583
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
571
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
584
572
  ...
585
573
 
586
574
  @typing.overload
587
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
575
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
588
576
  ...
589
577
 
590
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
578
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
591
579
  """
592
- Enables loading / saving of models within a step.
593
-
594
- > Examples
595
- - Saving Models
596
- ```python
597
- @model
598
- @step
599
- def train(self):
600
- # current.model.save returns a dictionary reference to the model saved
601
- self.my_model = current.model.save(
602
- path_to_my_model,
603
- label="my_model",
604
- metadata={
605
- "epochs": 10,
606
- "batch-size": 32,
607
- "learning-rate": 0.001,
608
- }
609
- )
610
- self.next(self.test)
611
-
612
- @model(load="my_model")
613
- @step
614
- def test(self):
615
- # `current.model.loaded` returns a dictionary of the loaded models
616
- # where the key is the name of the artifact and the value is the path to the model
617
- print(os.listdir(current.model.loaded["my_model"]))
618
- self.next(self.end)
619
- ```
580
+ Specifies that the step will success under all circumstances.
620
581
 
621
- - Loading models
622
- ```python
623
- @step
624
- def train(self):
625
- # current.model.load returns the path to the model loaded
626
- checkpoint_path = current.model.load(
627
- self.checkpoint_key,
628
- )
629
- model_path = current.model.load(
630
- self.model,
631
- )
632
- self.next(self.test)
633
- ```
582
+ The decorator will create an optional artifact, specified by `var`, which
583
+ contains the exception raised. You can use it to detect the presence
584
+ of errors, indicating that all happy-path artifacts produced by the step
585
+ are missing.
634
586
 
635
587
 
636
588
  Parameters
637
589
  ----------
638
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
639
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
640
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
641
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
642
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
643
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
644
-
645
- temp_dir_root : str, default: None
646
- The root directory under which `current.model.loaded` will store loaded models
647
- """
648
- ...
649
-
650
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
651
- """
652
- This decorator is used to run vllm APIs as Metaflow task sidecars.
653
-
654
- User code call
655
- --------------
656
- @vllm(
657
- model="...",
658
- ...
659
- )
660
-
661
- Valid backend options
662
- ---------------------
663
- - 'local': Run as a separate process on the local task machine.
664
-
665
- Valid model options
666
- -------------------
667
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
668
-
669
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
670
- If you need multiple models, you must create multiple @vllm decorators.
671
-
672
-
673
- Parameters
674
- ----------
675
- model: str
676
- HuggingFace model identifier to be served by vLLM.
677
- backend: str
678
- Determines where and how to run the vLLM process.
679
- openai_api_server: bool
680
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
681
- Default is False (uses native engine).
682
- Set to True for backward compatibility with existing code.
683
- debug: bool
684
- Whether to turn on verbose debugging logs.
685
- card_refresh_interval: int
686
- Interval in seconds for refreshing the vLLM status card.
687
- Only used when openai_api_server=True.
688
- max_retries: int
689
- Maximum number of retries checking for vLLM server startup.
690
- Only used when openai_api_server=True.
691
- retry_alert_frequency: int
692
- Frequency of alert logs for vLLM server startup retries.
693
- Only used when openai_api_server=True.
694
- engine_args : dict
695
- Additional keyword arguments to pass to the vLLM engine.
696
- For example, `tensor_parallel_size=2`.
697
- """
698
- ...
699
-
700
- @typing.overload
701
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
702
- """
703
- Decorator prototype for all step decorators. This function gets specialized
704
- and imported for all decorators types by _import_plugin_decorators().
705
- """
706
- ...
707
-
708
- @typing.overload
709
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
710
- ...
711
-
712
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
713
- """
714
- Decorator prototype for all step decorators. This function gets specialized
715
- and imported for all decorators types by _import_plugin_decorators().
716
- """
717
- ...
718
-
719
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
720
- """
721
- Specifies that this step should execute on Kubernetes.
722
-
723
-
724
- Parameters
725
- ----------
726
- cpu : int, default 1
727
- Number of CPUs required for this step. If `@resources` is
728
- also present, the maximum value from all decorators is used.
729
- memory : int, default 4096
730
- Memory size (in MB) required for this step. If
731
- `@resources` is also present, the maximum value from all decorators is
732
- used.
733
- disk : int, default 10240
734
- Disk size (in MB) required for this step. If
735
- `@resources` is also present, the maximum value from all decorators is
736
- used.
737
- image : str, optional, default None
738
- Docker image to use when launching on Kubernetes. If not specified, and
739
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
740
- not, a default Docker image mapping to the current version of Python is used.
741
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
742
- If given, the imagePullPolicy to be applied to the Docker image of the step.
743
- image_pull_secrets: List[str], default []
744
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
745
- Kubernetes image pull secrets to use when pulling container images
746
- in Kubernetes.
747
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
748
- Kubernetes service account to use when launching pod in Kubernetes.
749
- secrets : List[str], optional, default None
750
- Kubernetes secrets to use when launching pod in Kubernetes. These
751
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
752
- in Metaflow configuration.
753
- node_selector: Union[Dict[str,str], str], optional, default None
754
- Kubernetes node selector(s) to apply to the pod running the task.
755
- Can be passed in as a comma separated string of values e.g.
756
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
757
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
758
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
759
- Kubernetes namespace to use when launching pod in Kubernetes.
760
- gpu : int, optional, default None
761
- Number of GPUs required for this step. A value of zero implies that
762
- the scheduled node should not have GPUs.
763
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
764
- The vendor of the GPUs to be used for this step.
765
- tolerations : List[Dict[str,str]], default []
766
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
767
- Kubernetes tolerations to use when launching pod in Kubernetes.
768
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
769
- Kubernetes labels to use when launching pod in Kubernetes.
770
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
771
- Kubernetes annotations to use when launching pod in Kubernetes.
772
- use_tmpfs : bool, default False
773
- This enables an explicit tmpfs mount for this step.
774
- tmpfs_tempdir : bool, default True
775
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
776
- tmpfs_size : int, optional, default: None
777
- The value for the size (in MiB) of the tmpfs mount for this step.
778
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
779
- memory allocated for this step.
780
- tmpfs_path : str, optional, default /metaflow_temp
781
- Path to tmpfs mount for this step.
782
- persistent_volume_claims : Dict[str, str], optional, default None
783
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
784
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
785
- shared_memory: int, optional
786
- Shared memory size (in MiB) required for this step
787
- port: int, optional
788
- Port number to specify in the Kubernetes job object
789
- compute_pool : str, optional, default None
790
- Compute pool to be used for for this step.
791
- If not specified, any accessible compute pool within the perimeter is used.
792
- hostname_resolution_timeout: int, default 10 * 60
793
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
794
- Only applicable when @parallel is used.
795
- qos: str, default: Burstable
796
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
797
-
798
- security_context: Dict[str, Any], optional, default None
799
- Container security context. Applies to the task container. Allows the following keys:
800
- - privileged: bool, optional, default None
801
- - allow_privilege_escalation: bool, optional, default None
802
- - run_as_user: int, optional, default None
803
- - run_as_group: int, optional, default None
804
- - run_as_non_root: bool, optional, default None
805
- """
806
- ...
807
-
808
- @typing.overload
809
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
810
- """
811
- Specifies the Conda environment for the step.
812
-
813
- Information in this decorator will augment any
814
- attributes set in the `@conda_base` flow-level decorator. Hence,
815
- you can use `@conda_base` to set packages required by all
816
- steps and use `@conda` to specify step-specific overrides.
817
-
818
-
819
- Parameters
820
- ----------
821
- packages : Dict[str, str], default {}
822
- Packages to use for this step. The key is the name of the package
823
- and the value is the version to use.
824
- libraries : Dict[str, str], default {}
825
- Supported for backward compatibility. When used with packages, packages will take precedence.
826
- python : str, optional, default None
827
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
828
- that the version used will correspond to the version of the Python interpreter used to start the run.
829
- disabled : bool, default False
830
- If set to True, disables @conda.
831
- """
832
- ...
833
-
834
- @typing.overload
835
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
836
- ...
837
-
838
- @typing.overload
839
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
840
- ...
841
-
842
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
843
- """
844
- Specifies the Conda environment for the step.
845
-
846
- Information in this decorator will augment any
847
- attributes set in the `@conda_base` flow-level decorator. Hence,
848
- you can use `@conda_base` to set packages required by all
849
- steps and use `@conda` to specify step-specific overrides.
850
-
851
-
852
- Parameters
853
- ----------
854
- packages : Dict[str, str], default {}
855
- Packages to use for this step. The key is the name of the package
856
- and the value is the version to use.
857
- libraries : Dict[str, str], default {}
858
- Supported for backward compatibility. When used with packages, packages will take precedence.
859
- python : str, optional, default None
860
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
861
- that the version used will correspond to the version of the Python interpreter used to start the run.
862
- disabled : bool, default False
863
- If set to True, disables @conda.
590
+ var : str, optional, default None
591
+ Name of the artifact in which to store the caught exception.
592
+ If not specified, the exception is not stored.
593
+ print_exception : bool, default True
594
+ Determines whether or not the exception is printed to
595
+ stdout when caught.
864
596
  """
865
597
  ...
866
598
 
@@ -915,34 +647,518 @@ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
915
647
  """
916
648
  ...
917
649
 
918
- @typing.overload
919
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
920
- """
921
- Decorator prototype for all step decorators. This function gets specialized
922
- and imported for all decorators types by _import_plugin_decorators().
923
- """
924
- ...
925
-
926
- @typing.overload
927
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
928
- ...
929
-
930
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
931
- """
932
- Decorator prototype for all step decorators. This function gets specialized
933
- and imported for all decorators types by _import_plugin_decorators().
650
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, cache_scope: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
934
651
  """
935
- ...
936
-
937
- @typing.overload
938
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
939
- """
940
- Enables checkpointing for a step.
652
+ Decorator that helps cache, version, and store models/datasets from the Hugging Face Hub.
941
653
 
942
654
  > Examples
943
655
 
944
- - Saving Checkpoints
945
-
656
+ **Usage: creating references to models from the Hugging Face Hub that may be loaded in downstream steps**
657
+ ```python
658
+ @huggingface_hub
659
+ @step
660
+ def pull_model_from_huggingface(self):
661
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
662
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
663
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
664
+ # value of the function is a reference to the model in the backend storage.
665
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
666
+
667
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
668
+ self.llama_model = current.huggingface_hub.snapshot_download(
669
+ repo_id=self.model_id,
670
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
671
+ )
672
+ self.next(self.train)
673
+ ```
674
+
675
+ **Usage: loading models directly from the Hugging Face Hub or from cache (from Metaflow's datastore)**
676
+ ```python
677
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
678
+ @step
679
+ def pull_model_from_huggingface(self):
680
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
681
+ ```
682
+
683
+ ```python
684
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora", "/my-lora-directory")])
685
+ @step
686
+ def finetune_model(self):
687
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
688
+ # path_to_model will be /my-directory
689
+ ```
690
+
691
+ ```python
692
+ # Takes all the arguments passed to `snapshot_download`
693
+ # except for `local_dir`
694
+ @huggingface_hub(load=[
695
+ {
696
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
697
+ },
698
+ {
699
+ "repo_id": "myorg/mistral-lora",
700
+ "repo_type": "model",
701
+ },
702
+ ])
703
+ @step
704
+ def finetune_model(self):
705
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
706
+ # path_to_model will be /my-directory
707
+ ```
708
+
709
+
710
+ Parameters
711
+ ----------
712
+ temp_dir_root : str, optional
713
+ The root directory that will hold the temporary directory where objects will be downloaded.
714
+
715
+ cache_scope : str, optional
716
+ The scope of the cache. Can be `checkpoint` / `flow` / `global`.
717
+
718
+ - `checkpoint` (default): All repos are stored like objects saved by `@checkpoint`.
719
+ i.e., the cached path is derived from the namespace, flow, step, and Metaflow foreach iteration.
720
+ Any repo downloaded under this scope will only be retrieved from the cache when the step runs under the same namespace in the same flow (at the same foreach index).
721
+
722
+ - `flow`: All repos are cached under the flow, regardless of namespace.
723
+ i.e., the cached path is derived solely from the flow name.
724
+ When to use this mode:
725
+ - Multiple users are executing the same flow and want shared access to the repos cached by the decorator.
726
+ - Multiple versions of a flow are deployed, all needing access to the same repos cached by the decorator.
727
+
728
+ - `global`: All repos are cached under a globally static path.
729
+ i.e., the base path of the cache is static and all repos are stored under it.
730
+ When to use this mode:
731
+ - All repos from the Hugging Face Hub need to be shared by users across all flow executions.
732
+
733
+ Each caching scope comes with its own trade-offs:
734
+ - `checkpoint`:
735
+ - Has explicit control over when caches are populated (controlled by the same flow that has the `@huggingface_hub` decorator) but ends up hitting the Hugging Face Hub more often if there are many users/namespaces/steps.
736
+ - Since objects are written on a `namespace/flow/step` basis, the blast radius of a bad checkpoint is limited to a particular flow in a namespace.
737
+ - `flow`:
738
+ - Has less control over when caches are populated (can be written by any execution instance of a flow from any namespace) but results in more cache hits.
739
+ - The blast radius of a bad checkpoint is limited to all runs of a particular flow.
740
+ - It doesn't promote cache reuse across flows.
741
+ - `global`:
742
+ - Has no control over when caches are populated (can be written by any flow execution) but has the highest cache hit rate.
743
+ - It promotes cache reuse across flows.
744
+ - The blast radius of a bad checkpoint spans every flow that could be using a particular repo.
745
+
746
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
747
+ The list of repos (models/datasets) to load.
748
+
749
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
750
+
751
+ - If repo (model/dataset) is not found in the datastore:
752
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
753
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
754
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
755
+
756
+ - If repo is found in the datastore:
757
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
758
+ """
759
+ ...
760
+
761
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
762
+ """
763
+ Specifies that this step should execute on DGX cloud.
764
+
765
+
766
+ Parameters
767
+ ----------
768
+ gpu : int
769
+ Number of GPUs to use.
770
+ gpu_type : str
771
+ Type of Nvidia GPU to use.
772
+ queue_timeout : int
773
+ Time to keep the job in NVCF's queue.
774
+ """
775
+ ...
776
+
777
+ @typing.overload
778
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
779
+ """
780
+ Specifies environment variables to be set prior to the execution of a step.
781
+
782
+
783
+ Parameters
784
+ ----------
785
+ vars : Dict[str, str], default {}
786
+ Dictionary of environment variables to set.
787
+ """
788
+ ...
789
+
790
+ @typing.overload
791
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
792
+ ...
793
+
794
+ @typing.overload
795
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
796
+ ...
797
+
798
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
799
+ """
800
+ Specifies environment variables to be set prior to the execution of a step.
801
+
802
+
803
+ Parameters
804
+ ----------
805
+ vars : Dict[str, str], default {}
806
+ Dictionary of environment variables to set.
807
+ """
808
+ ...
809
+
810
+ @typing.overload
811
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
812
+ """
813
+ Enables loading / saving of models within a step.
814
+
815
+ > Examples
816
+ - Saving Models
817
+ ```python
818
+ @model
819
+ @step
820
+ def train(self):
821
+ # current.model.save returns a dictionary reference to the model saved
822
+ self.my_model = current.model.save(
823
+ path_to_my_model,
824
+ label="my_model",
825
+ metadata={
826
+ "epochs": 10,
827
+ "batch-size": 32,
828
+ "learning-rate": 0.001,
829
+ }
830
+ )
831
+ self.next(self.test)
832
+
833
+ @model(load="my_model")
834
+ @step
835
+ def test(self):
836
+ # `current.model.loaded` returns a dictionary of the loaded models
837
+ # where the key is the name of the artifact and the value is the path to the model
838
+ print(os.listdir(current.model.loaded["my_model"]))
839
+ self.next(self.end)
840
+ ```
841
+
842
+ - Loading models
843
+ ```python
844
+ @step
845
+ def train(self):
846
+ # current.model.load returns the path to the model loaded
847
+ checkpoint_path = current.model.load(
848
+ self.checkpoint_key,
849
+ )
850
+ model_path = current.model.load(
851
+ self.model,
852
+ )
853
+ self.next(self.test)
854
+ ```
855
+
856
+
857
+ Parameters
858
+ ----------
859
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
860
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
861
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
862
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
863
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
864
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
865
+
866
+ temp_dir_root : str, default: None
867
+ The root directory under which `current.model.loaded` will store loaded models
868
+ """
869
+ ...
870
+
871
+ @typing.overload
872
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
873
+ ...
874
+
875
+ @typing.overload
876
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
877
+ ...
878
+
879
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
880
+ """
881
+ Enables loading / saving of models within a step.
882
+
883
+ > Examples
884
+ - Saving Models
885
+ ```python
886
+ @model
887
+ @step
888
+ def train(self):
889
+ # current.model.save returns a dictionary reference to the model saved
890
+ self.my_model = current.model.save(
891
+ path_to_my_model,
892
+ label="my_model",
893
+ metadata={
894
+ "epochs": 10,
895
+ "batch-size": 32,
896
+ "learning-rate": 0.001,
897
+ }
898
+ )
899
+ self.next(self.test)
900
+
901
+ @model(load="my_model")
902
+ @step
903
+ def test(self):
904
+ # `current.model.loaded` returns a dictionary of the loaded models
905
+ # where the key is the name of the artifact and the value is the path to the model
906
+ print(os.listdir(current.model.loaded["my_model"]))
907
+ self.next(self.end)
908
+ ```
909
+
910
+ - Loading models
911
+ ```python
912
+ @step
913
+ def train(self):
914
+ # current.model.load returns the path to the model loaded
915
+ checkpoint_path = current.model.load(
916
+ self.checkpoint_key,
917
+ )
918
+ model_path = current.model.load(
919
+ self.model,
920
+ )
921
+ self.next(self.test)
922
+ ```
923
+
924
+
925
+ Parameters
926
+ ----------
927
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
928
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
929
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
930
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
931
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
932
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
933
+
934
+ temp_dir_root : str, default: None
935
+ The root directory under which `current.model.loaded` will store loaded models
936
+ """
937
+ ...
938
+
939
+ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
940
+ """
941
+ S3 Proxy decorator for routing S3 requests through a local proxy service.
942
+
943
+
944
+ Parameters
945
+ ----------
946
+ integration_name : str, optional
947
+ Name of the S3 proxy integration. If not specified, will use the only
948
+ available S3 proxy integration in the namespace (fails if multiple exist).
949
+ write_mode : str, optional
950
+ The desired behavior during write operations to target (origin) S3 bucket.
951
+ allowed options are:
952
+ "origin-and-cache" -> write to both the target S3 bucket and local object
953
+ storage
954
+ "origin" -> only write to the target S3 bucket
955
+ "cache" -> only write to the object storage service used for caching
956
+ debug : bool, optional
957
+ Enable debug logging for proxy operations.
958
+ """
959
+ ...
960
+
961
+ @typing.overload
962
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
963
+ """
964
+ Decorator prototype for all step decorators. This function gets specialized
965
+ and imported for all decorators types by _import_plugin_decorators().
966
+ """
967
+ ...
968
+
969
+ @typing.overload
970
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
971
+ ...
972
+
973
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
974
+ """
975
+ Decorator prototype for all step decorators. This function gets specialized
976
+ and imported for all decorators types by _import_plugin_decorators().
977
+ """
978
+ ...
979
+
980
+ @typing.overload
981
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
982
+ """
983
+ Specifies the resources needed when executing this step.
984
+
985
+ Use `@resources` to specify the resource requirements
986
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
987
+
988
+ You can choose the compute layer on the command line by executing e.g.
989
+ ```
990
+ python myflow.py run --with batch
991
+ ```
992
+ or
993
+ ```
994
+ python myflow.py run --with kubernetes
995
+ ```
996
+ which executes the flow on the desired system using the
997
+ requirements specified in `@resources`.
998
+
999
+
1000
+ Parameters
1001
+ ----------
1002
+ cpu : int, default 1
1003
+ Number of CPUs required for this step.
1004
+ gpu : int, optional, default None
1005
+ Number of GPUs required for this step.
1006
+ disk : int, optional, default None
1007
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1008
+ memory : int, default 4096
1009
+ Memory size (in MB) required for this step.
1010
+ shared_memory : int, optional, default None
1011
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1012
+ This parameter maps to the `--shm-size` option in Docker.
1013
+ """
1014
+ ...
1015
+
1016
+ @typing.overload
1017
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1018
+ ...
1019
+
1020
+ @typing.overload
1021
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1022
+ ...
1023
+
1024
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1025
+ """
1026
+ Specifies the resources needed when executing this step.
1027
+
1028
+ Use `@resources` to specify the resource requirements
1029
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1030
+
1031
+ You can choose the compute layer on the command line by executing e.g.
1032
+ ```
1033
+ python myflow.py run --with batch
1034
+ ```
1035
+ or
1036
+ ```
1037
+ python myflow.py run --with kubernetes
1038
+ ```
1039
+ which executes the flow on the desired system using the
1040
+ requirements specified in `@resources`.
1041
+
1042
+
1043
+ Parameters
1044
+ ----------
1045
+ cpu : int, default 1
1046
+ Number of CPUs required for this step.
1047
+ gpu : int, optional, default None
1048
+ Number of GPUs required for this step.
1049
+ disk : int, optional, default None
1050
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1051
+ memory : int, default 4096
1052
+ Memory size (in MB) required for this step.
1053
+ shared_memory : int, optional, default None
1054
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1055
+ This parameter maps to the `--shm-size` option in Docker.
1056
+ """
1057
+ ...
1058
+
1059
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1060
+ """
1061
+ Specifies that this step should execute on DGX cloud.
1062
+
1063
+
1064
+ Parameters
1065
+ ----------
1066
+ gpu : int
1067
+ Number of GPUs to use.
1068
+ gpu_type : str
1069
+ Type of Nvidia GPU to use.
1070
+ """
1071
+ ...
1072
+
1073
+ @typing.overload
1074
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1075
+ """
1076
+ Specifies the Conda environment for the step.
1077
+
1078
+ Information in this decorator will augment any
1079
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1080
+ you can use `@conda_base` to set packages required by all
1081
+ steps and use `@conda` to specify step-specific overrides.
1082
+
1083
+
1084
+ Parameters
1085
+ ----------
1086
+ packages : Dict[str, str], default {}
1087
+ Packages to use for this step. The key is the name of the package
1088
+ and the value is the version to use.
1089
+ libraries : Dict[str, str], default {}
1090
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1091
+ python : str, optional, default None
1092
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1093
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1094
+ disabled : bool, default False
1095
+ If set to True, disables @conda.
1096
+ """
1097
+ ...
1098
+
1099
+ @typing.overload
1100
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1101
+ ...
1102
+
1103
+ @typing.overload
1104
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1105
+ ...
1106
+
1107
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1108
+ """
1109
+ Specifies the Conda environment for the step.
1110
+
1111
+ Information in this decorator will augment any
1112
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1113
+ you can use `@conda_base` to set packages required by all
1114
+ steps and use `@conda` to specify step-specific overrides.
1115
+
1116
+
1117
+ Parameters
1118
+ ----------
1119
+ packages : Dict[str, str], default {}
1120
+ Packages to use for this step. The key is the name of the package
1121
+ and the value is the version to use.
1122
+ libraries : Dict[str, str], default {}
1123
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1124
+ python : str, optional, default None
1125
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1126
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1127
+ disabled : bool, default False
1128
+ If set to True, disables @conda.
1129
+ """
1130
+ ...
1131
+
1132
+ @typing.overload
1133
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1134
+ """
1135
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1136
+ It exists to make it easier for users to know that this decorator should only be used with
1137
+ a Neo Cloud like Nebius.
1138
+ """
1139
+ ...
1140
+
1141
+ @typing.overload
1142
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1143
+ ...
1144
+
1145
+ def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1146
+ """
1147
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1148
+ It exists to make it easier for users to know that this decorator should only be used with
1149
+ a Neo Cloud like Nebius.
1150
+ """
1151
+ ...
1152
+
1153
+ @typing.overload
1154
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1155
+ """
1156
+ Enables checkpointing for a step.
1157
+
1158
+ > Examples
1159
+
1160
+ - Saving Checkpoints
1161
+
946
1162
  ```python
947
1163
  @checkpoint
948
1164
  @step
@@ -1054,390 +1270,241 @@ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None
1054
1270
  # saved a checkpoint
1055
1271
  checkpoint_path = None
1056
1272
  if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1057
- print("Loaded checkpoint from the previous attempt")
1058
- checkpoint_path = current.checkpoint.directory
1059
-
1060
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1061
- for i in range(self.epochs):
1062
- ...
1063
- ```
1064
-
1065
-
1066
- Parameters
1067
- ----------
1068
- load_policy : str, default: "fresh"
1069
- The policy for loading the checkpoint. The following policies are supported:
1070
- - "eager": Loads the the latest available checkpoint within the namespace.
1071
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1072
- will be loaded at the start of the task.
1073
- - "none": Do not load any checkpoint
1074
- - "fresh": Loads the lastest checkpoint created within the running Task.
1075
- This mode helps loading checkpoints across various retry attempts of the same task.
1076
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1077
- created within the task will be loaded when the task is retries execution on failure.
1078
-
1079
- temp_dir_root : str, default: None
1080
- The root directory under which `current.checkpoint.directory` will be created.
1081
- """
1082
- ...
1083
-
1084
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1085
- """
1086
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
1087
-
1088
- User code call
1089
- --------------
1090
- @ollama(
1091
- models=[...],
1092
- ...
1093
- )
1094
-
1095
- Valid backend options
1096
- ---------------------
1097
- - 'local': Run as a separate process on the local task machine.
1098
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1099
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1100
-
1101
- Valid model options
1102
- -------------------
1103
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
1104
-
1105
-
1106
- Parameters
1107
- ----------
1108
- models: list[str]
1109
- List of Ollama containers running models in sidecars.
1110
- backend: str
1111
- Determines where and how to run the Ollama process.
1112
- force_pull: bool
1113
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1114
- cache_update_policy: str
1115
- Cache update policy: "auto", "force", or "never".
1116
- force_cache_update: bool
1117
- Simple override for "force" cache update policy.
1118
- debug: bool
1119
- Whether to turn on verbose debugging logs.
1120
- circuit_breaker_config: dict
1121
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1122
- timeout_config: dict
1123
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
1124
- """
1125
- ...
1126
-
1127
- @typing.overload
1128
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1129
- """
1130
- A simple decorator that demonstrates using CardDecoratorInjector
1131
- to inject a card and render simple markdown content.
1132
- """
1133
- ...
1134
-
1135
- @typing.overload
1136
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1137
- ...
1138
-
1139
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1140
- """
1141
- A simple decorator that demonstrates using CardDecoratorInjector
1142
- to inject a card and render simple markdown content.
1143
- """
1144
- ...
1145
-
1146
- @typing.overload
1147
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1148
- """
1149
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1150
- It exists to make it easier for users to know that this decorator should only be used with
1151
- a Neo Cloud like Nebius.
1152
- """
1153
- ...
1154
-
1155
- @typing.overload
1156
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1157
- ...
1158
-
1159
- def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1160
- """
1161
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1162
- It exists to make it easier for users to know that this decorator should only be used with
1163
- a Neo Cloud like Nebius.
1164
- """
1165
- ...
1166
-
1167
- @typing.overload
1168
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1169
- """
1170
- Specifies the resources needed when executing this step.
1171
-
1172
- Use `@resources` to specify the resource requirements
1173
- independently of the specific compute layer (`@batch`, `@kubernetes`).
1174
-
1175
- You can choose the compute layer on the command line by executing e.g.
1176
- ```
1177
- python myflow.py run --with batch
1178
- ```
1179
- or
1180
- ```
1181
- python myflow.py run --with kubernetes
1182
- ```
1183
- which executes the flow on the desired system using the
1184
- requirements specified in `@resources`.
1185
-
1186
-
1187
- Parameters
1188
- ----------
1189
- cpu : int, default 1
1190
- Number of CPUs required for this step.
1191
- gpu : int, optional, default None
1192
- Number of GPUs required for this step.
1193
- disk : int, optional, default None
1194
- Disk size (in MB) required for this step. Only applies on Kubernetes.
1195
- memory : int, default 4096
1196
- Memory size (in MB) required for this step.
1197
- shared_memory : int, optional, default None
1198
- The value for the size (in MiB) of the /dev/shm volume for this step.
1199
- This parameter maps to the `--shm-size` option in Docker.
1200
- """
1201
- ...
1202
-
1203
- @typing.overload
1204
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1205
- ...
1206
-
1207
- @typing.overload
1208
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1209
- ...
1210
-
1211
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1212
- """
1213
- Specifies the resources needed when executing this step.
1214
-
1215
- Use `@resources` to specify the resource requirements
1216
- independently of the specific compute layer (`@batch`, `@kubernetes`).
1217
-
1218
- You can choose the compute layer on the command line by executing e.g.
1219
- ```
1220
- python myflow.py run --with batch
1221
- ```
1222
- or
1223
- ```
1224
- python myflow.py run --with kubernetes
1273
+ print("Loaded checkpoint from the previous attempt")
1274
+ checkpoint_path = current.checkpoint.directory
1275
+
1276
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1277
+ for i in range(self.epochs):
1278
+ ...
1225
1279
  ```
1226
- which executes the flow on the desired system using the
1227
- requirements specified in `@resources`.
1228
1280
 
1229
1281
 
1230
1282
  Parameters
1231
1283
  ----------
1232
- cpu : int, default 1
1233
- Number of CPUs required for this step.
1234
- gpu : int, optional, default None
1235
- Number of GPUs required for this step.
1236
- disk : int, optional, default None
1237
- Disk size (in MB) required for this step. Only applies on Kubernetes.
1238
- memory : int, default 4096
1239
- Memory size (in MB) required for this step.
1240
- shared_memory : int, optional, default None
1241
- The value for the size (in MiB) of the /dev/shm volume for this step.
1242
- This parameter maps to the `--shm-size` option in Docker.
1284
+ load_policy : str, default: "fresh"
1285
+ The policy for loading the checkpoint. The following policies are supported:
1286
+ - "eager": Loads the the latest available checkpoint within the namespace.
1287
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1288
+ will be loaded at the start of the task.
1289
+ - "none": Do not load any checkpoint
1290
+ - "fresh": Loads the lastest checkpoint created within the running Task.
1291
+ This mode helps loading checkpoints across various retry attempts of the same task.
1292
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1293
+ created within the task will be loaded when the task is retries execution on failure.
1294
+
1295
+ temp_dir_root : str, default: None
1296
+ The root directory under which `current.checkpoint.directory` will be created.
1243
1297
  """
1244
1298
  ...
1245
1299
 
1246
- @typing.overload
1247
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1300
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1248
1301
  """
1249
- Specifies that the step will success under all circumstances.
1250
-
1251
- The decorator will create an optional artifact, specified by `var`, which
1252
- contains the exception raised. You can use it to detect the presence
1253
- of errors, indicating that all happy-path artifacts produced by the step
1254
- are missing.
1302
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
1255
1303
 
1304
+ User code call
1305
+ --------------
1306
+ @ollama(
1307
+ models=[...],
1308
+ ...
1309
+ )
1256
1310
 
1257
- Parameters
1258
- ----------
1259
- var : str, optional, default None
1260
- Name of the artifact in which to store the caught exception.
1261
- If not specified, the exception is not stored.
1262
- print_exception : bool, default True
1263
- Determines whether or not the exception is printed to
1264
- stdout when caught.
1265
- """
1266
- ...
1267
-
1268
- @typing.overload
1269
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1270
- ...
1271
-
1272
- @typing.overload
1273
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1274
- ...
1275
-
1276
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1277
- """
1278
- Specifies that the step will success under all circumstances.
1311
+ Valid backend options
1312
+ ---------------------
1313
+ - 'local': Run as a separate process on the local task machine.
1314
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1315
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1279
1316
 
1280
- The decorator will create an optional artifact, specified by `var`, which
1281
- contains the exception raised. You can use it to detect the presence
1282
- of errors, indicating that all happy-path artifacts produced by the step
1283
- are missing.
1317
+ Valid model options
1318
+ -------------------
1319
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
1284
1320
 
1285
1321
 
1286
1322
  Parameters
1287
1323
  ----------
1288
- var : str, optional, default None
1289
- Name of the artifact in which to store the caught exception.
1290
- If not specified, the exception is not stored.
1291
- print_exception : bool, default True
1292
- Determines whether or not the exception is printed to
1293
- stdout when caught.
1324
+ models: list[str]
1325
+ List of Ollama containers running models in sidecars.
1326
+ backend: str
1327
+ Determines where and how to run the Ollama process.
1328
+ force_pull: bool
1329
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1330
+ cache_update_policy: str
1331
+ Cache update policy: "auto", "force", or "never".
1332
+ force_cache_update: bool
1333
+ Simple override for "force" cache update policy.
1334
+ debug: bool
1335
+ Whether to turn on verbose debugging logs.
1336
+ circuit_breaker_config: dict
1337
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1338
+ timeout_config: dict
1339
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
1294
1340
  """
1295
1341
  ...
1296
1342
 
1297
1343
  @typing.overload
1298
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1344
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1299
1345
  """
1300
- Specifies the number of times the task corresponding
1301
- to a step needs to be retried.
1302
-
1303
- This decorator is useful for handling transient errors, such as networking issues.
1304
- If your task contains operations that can't be retried safely, e.g. database updates,
1305
- it is advisable to annotate it with `@retry(times=0)`.
1306
-
1307
- This can be used in conjunction with the `@catch` decorator. The `@catch`
1308
- decorator will execute a no-op task after all retries have been exhausted,
1309
- ensuring that the flow execution can continue.
1346
+ Specifies secrets to be retrieved and injected as environment variables prior to
1347
+ the execution of a step.
1310
1348
 
1311
1349
 
1312
1350
  Parameters
1313
1351
  ----------
1314
- times : int, default 3
1315
- Number of times to retry this task.
1316
- minutes_between_retries : int, default 2
1317
- Number of minutes between retries.
1352
+ sources : List[Union[str, Dict[str, Any]]], default: []
1353
+ List of secret specs, defining how the secrets are to be retrieved
1354
+ role : str, optional, default: None
1355
+ Role to use for fetching secrets
1318
1356
  """
1319
1357
  ...
1320
1358
 
1321
1359
  @typing.overload
1322
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1360
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1323
1361
  ...
1324
1362
 
1325
1363
  @typing.overload
1326
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1364
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1327
1365
  ...
1328
1366
 
1329
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1367
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1330
1368
  """
1331
- Specifies the number of times the task corresponding
1332
- to a step needs to be retried.
1333
-
1334
- This decorator is useful for handling transient errors, such as networking issues.
1335
- If your task contains operations that can't be retried safely, e.g. database updates,
1336
- it is advisable to annotate it with `@retry(times=0)`.
1337
-
1338
- This can be used in conjunction with the `@catch` decorator. The `@catch`
1339
- decorator will execute a no-op task after all retries have been exhausted,
1340
- ensuring that the flow execution can continue.
1369
+ Specifies secrets to be retrieved and injected as environment variables prior to
1370
+ the execution of a step.
1341
1371
 
1342
1372
 
1343
1373
  Parameters
1344
1374
  ----------
1345
- times : int, default 3
1346
- Number of times to retry this task.
1347
- minutes_between_retries : int, default 2
1348
- Number of minutes between retries.
1375
+ sources : List[Union[str, Dict[str, Any]]], default: []
1376
+ List of secret specs, defining how the secrets are to be retrieved
1377
+ role : str, optional, default: None
1378
+ Role to use for fetching secrets
1349
1379
  """
1350
1380
  ...
1351
1381
 
1352
- @typing.overload
1353
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1382
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1354
1383
  """
1355
- Specifies the PyPI packages for all steps of the flow.
1384
+ Specifies what flows belong to the same project.
1385
+
1386
+ A project-specific namespace is created for all flows that
1387
+ use the same `@project(name)`.
1356
1388
 
1357
- Use `@pypi_base` to set common packages required by all
1358
- steps and use `@pypi` to specify step-specific overrides.
1359
1389
 
1360
1390
  Parameters
1361
1391
  ----------
1362
- packages : Dict[str, str], default: {}
1363
- Packages to use for this flow. The key is the name of the package
1364
- and the value is the version to use.
1365
- python : str, optional, default: None
1366
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1367
- that the version used will correspond to the version of the Python interpreter used to start the run.
1392
+ name : str
1393
+ Project name. Make sure that the name is unique amongst all
1394
+ projects that use the same production scheduler. The name may
1395
+ contain only lowercase alphanumeric characters and underscores.
1396
+
1397
+ branch : Optional[str], default None
1398
+ The branch to use. If not specified, the branch is set to
1399
+ `user.<username>` unless `production` is set to `True`. This can
1400
+ also be set on the command line using `--branch` as a top-level option.
1401
+ It is an error to specify `branch` in the decorator and on the command line.
1402
+
1403
+ production : bool, default False
1404
+ Whether or not the branch is the production branch. This can also be set on the
1405
+ command line using `--production` as a top-level option. It is an error to specify
1406
+ `production` in the decorator and on the command line.
1407
+ The project branch name will be:
1408
+ - if `branch` is specified:
1409
+ - if `production` is True: `prod.<branch>`
1410
+ - if `production` is False: `test.<branch>`
1411
+ - if `branch` is not specified:
1412
+ - if `production` is True: `prod`
1413
+ - if `production` is False: `user.<username>`
1368
1414
  """
1369
1415
  ...
1370
1416
 
1371
1417
  @typing.overload
1372
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1373
- ...
1374
-
1375
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1418
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1376
1419
  """
1377
- Specifies the PyPI packages for all steps of the flow.
1420
+ Specifies the times when the flow should be run when running on a
1421
+ production scheduler.
1378
1422
 
1379
- Use `@pypi_base` to set common packages required by all
1380
- steps and use `@pypi` to specify step-specific overrides.
1381
1423
 
1382
1424
  Parameters
1383
1425
  ----------
1384
- packages : Dict[str, str], default: {}
1385
- Packages to use for this flow. The key is the name of the package
1386
- and the value is the version to use.
1387
- python : str, optional, default: None
1388
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1389
- that the version used will correspond to the version of the Python interpreter used to start the run.
1426
+ hourly : bool, default False
1427
+ Run the workflow hourly.
1428
+ daily : bool, default True
1429
+ Run the workflow daily.
1430
+ weekly : bool, default False
1431
+ Run the workflow weekly.
1432
+ cron : str, optional, default None
1433
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1434
+ specified by this expression.
1435
+ timezone : str, optional, default None
1436
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1437
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1390
1438
  """
1391
1439
  ...
1392
1440
 
1393
1441
  @typing.overload
1394
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1442
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1443
+ ...
1444
+
1445
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1395
1446
  """
1396
- Specifies the Conda environment for all steps of the flow.
1397
-
1398
- Use `@conda_base` to set common libraries required by all
1399
- steps and use `@conda` to specify step-specific additions.
1447
+ Specifies the times when the flow should be run when running on a
1448
+ production scheduler.
1400
1449
 
1401
1450
 
1402
1451
  Parameters
1403
1452
  ----------
1404
- packages : Dict[str, str], default {}
1405
- Packages to use for this flow. The key is the name of the package
1406
- and the value is the version to use.
1407
- libraries : Dict[str, str], default {}
1408
- Supported for backward compatibility. When used with packages, packages will take precedence.
1409
- python : str, optional, default None
1410
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1411
- that the version used will correspond to the version of the Python interpreter used to start the run.
1412
- disabled : bool, default False
1413
- If set to True, disables Conda.
1414
- """
1415
- ...
1416
-
1417
- @typing.overload
1418
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1453
+ hourly : bool, default False
1454
+ Run the workflow hourly.
1455
+ daily : bool, default True
1456
+ Run the workflow daily.
1457
+ weekly : bool, default False
1458
+ Run the workflow weekly.
1459
+ cron : str, optional, default None
1460
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1461
+ specified by this expression.
1462
+ timezone : str, optional, default None
1463
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1464
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1465
+ """
1419
1466
  ...
1420
1467
 
1421
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1468
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1422
1469
  """
1423
- Specifies the Conda environment for all steps of the flow.
1424
-
1425
- Use `@conda_base` to set common libraries required by all
1426
- steps and use `@conda` to specify step-specific additions.
1470
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1471
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1472
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1473
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1474
+ starts only after all sensors finish.
1427
1475
 
1428
1476
 
1429
1477
  Parameters
1430
1478
  ----------
1431
- packages : Dict[str, str], default {}
1432
- Packages to use for this flow. The key is the name of the package
1433
- and the value is the version to use.
1434
- libraries : Dict[str, str], default {}
1435
- Supported for backward compatibility. When used with packages, packages will take precedence.
1436
- python : str, optional, default None
1437
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1438
- that the version used will correspond to the version of the Python interpreter used to start the run.
1439
- disabled : bool, default False
1440
- If set to True, disables Conda.
1479
+ timeout : int
1480
+ Time, in seconds before the task times out and fails. (Default: 3600)
1481
+ poke_interval : int
1482
+ Time in seconds that the job should wait in between each try. (Default: 60)
1483
+ mode : str
1484
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1485
+ exponential_backoff : bool
1486
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1487
+ pool : str
1488
+ the slot pool this task should run in,
1489
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1490
+ soft_fail : bool
1491
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1492
+ name : str
1493
+ Name of the sensor on Airflow
1494
+ description : str
1495
+ Description of sensor in the Airflow UI
1496
+ bucket_key : Union[str, List[str]]
1497
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1498
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1499
+ bucket_name : str
1500
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1501
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1502
+ wildcard_match : bool
1503
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1504
+ aws_conn_id : str
1505
+ a reference to the s3 connection on Airflow. (Default: None)
1506
+ verify : bool
1507
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1441
1508
  """
1442
1509
  ...
1443
1510
 
@@ -1534,6 +1601,171 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1534
1601
  """
1535
1602
  ...
1536
1603
 
1604
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1605
+ """
1606
+ Allows setting external datastores to save data for the
1607
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1608
+
1609
+ This decorator is useful when users wish to save data to a different datastore
1610
+ than what is configured in Metaflow. This can be for variety of reasons:
1611
+
1612
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1613
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1614
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1615
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1616
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1617
+
1618
+ Usage:
1619
+ ----------
1620
+
1621
+ - Using a custom IAM role to access the datastore.
1622
+
1623
+ ```python
1624
+ @with_artifact_store(
1625
+ type="s3",
1626
+ config=lambda: {
1627
+ "root": "s3://my-bucket-foo/path/to/root",
1628
+ "role_arn": ROLE,
1629
+ },
1630
+ )
1631
+ class MyFlow(FlowSpec):
1632
+
1633
+ @checkpoint
1634
+ @step
1635
+ def start(self):
1636
+ with open("my_file.txt", "w") as f:
1637
+ f.write("Hello, World!")
1638
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1639
+ self.next(self.end)
1640
+
1641
+ ```
1642
+
1643
+ - Using credentials to access the s3-compatible datastore.
1644
+
1645
+ ```python
1646
+ @with_artifact_store(
1647
+ type="s3",
1648
+ config=lambda: {
1649
+ "root": "s3://my-bucket-foo/path/to/root",
1650
+ "client_params": {
1651
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1652
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1653
+ },
1654
+ },
1655
+ )
1656
+ class MyFlow(FlowSpec):
1657
+
1658
+ @checkpoint
1659
+ @step
1660
+ def start(self):
1661
+ with open("my_file.txt", "w") as f:
1662
+ f.write("Hello, World!")
1663
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1664
+ self.next(self.end)
1665
+
1666
+ ```
1667
+
1668
+ - Accessing objects stored in external datastores after task execution.
1669
+
1670
+ ```python
1671
+ run = Run("CheckpointsTestsFlow/8992")
1672
+ with artifact_store_from(run=run, config={
1673
+ "client_params": {
1674
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1675
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1676
+ },
1677
+ }):
1678
+ with Checkpoint() as cp:
1679
+ latest = cp.list(
1680
+ task=run["start"].task
1681
+ )[0]
1682
+ print(latest)
1683
+ cp.load(
1684
+ latest,
1685
+ "test-checkpoints"
1686
+ )
1687
+
1688
+ task = Task("TorchTuneFlow/8484/train/53673")
1689
+ with artifact_store_from(run=run, config={
1690
+ "client_params": {
1691
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1692
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1693
+ },
1694
+ }):
1695
+ load_model(
1696
+ task.data.model_ref,
1697
+ "test-models"
1698
+ )
1699
+ ```
1700
+ Parameters:
1701
+ ----------
1702
+
1703
+ type: str
1704
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1705
+
1706
+ config: dict or Callable
1707
+ Dictionary of configuration options for the datastore. The following keys are required:
1708
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1709
+ - example: 's3://bucket-name/path/to/root'
1710
+ - example: 'gs://bucket-name/path/to/root'
1711
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1712
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1713
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1714
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1715
+ """
1716
+ ...
1717
+
1718
+ @typing.overload
1719
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1720
+ """
1721
+ Specifies the Conda environment for all steps of the flow.
1722
+
1723
+ Use `@conda_base` to set common libraries required by all
1724
+ steps and use `@conda` to specify step-specific additions.
1725
+
1726
+
1727
+ Parameters
1728
+ ----------
1729
+ packages : Dict[str, str], default {}
1730
+ Packages to use for this flow. The key is the name of the package
1731
+ and the value is the version to use.
1732
+ libraries : Dict[str, str], default {}
1733
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1734
+ python : str, optional, default None
1735
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1736
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1737
+ disabled : bool, default False
1738
+ If set to True, disables Conda.
1739
+ """
1740
+ ...
1741
+
1742
+ @typing.overload
1743
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1744
+ ...
1745
+
1746
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1747
+ """
1748
+ Specifies the Conda environment for all steps of the flow.
1749
+
1750
+ Use `@conda_base` to set common libraries required by all
1751
+ steps and use `@conda` to specify step-specific additions.
1752
+
1753
+
1754
+ Parameters
1755
+ ----------
1756
+ packages : Dict[str, str], default {}
1757
+ Packages to use for this flow. The key is the name of the package
1758
+ and the value is the version to use.
1759
+ libraries : Dict[str, str], default {}
1760
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1761
+ python : str, optional, default None
1762
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1763
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1764
+ disabled : bool, default False
1765
+ If set to True, disables Conda.
1766
+ """
1767
+ ...
1768
+
1537
1769
  @typing.overload
1538
1770
  def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1539
1771
  """
@@ -1626,12 +1858,53 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1626
1858
 
1627
1859
  Parameters
1628
1860
  ----------
1629
- flow : Union[str, Dict[str, str]], optional, default None
1630
- Upstream flow dependency for this flow.
1631
- flows : List[Union[str, Dict[str, str]]], default []
1632
- Upstream flow dependencies for this flow.
1633
- options : Dict[str, Any], default {}
1634
- Backend-specific configuration for tuning eventing behavior.
1861
+ flow : Union[str, Dict[str, str]], optional, default None
1862
+ Upstream flow dependency for this flow.
1863
+ flows : List[Union[str, Dict[str, str]]], default []
1864
+ Upstream flow dependencies for this flow.
1865
+ options : Dict[str, Any], default {}
1866
+ Backend-specific configuration for tuning eventing behavior.
1867
+ """
1868
+ ...
1869
+
1870
+ @typing.overload
1871
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1872
+ """
1873
+ Specifies the PyPI packages for all steps of the flow.
1874
+
1875
+ Use `@pypi_base` to set common packages required by all
1876
+ steps and use `@pypi` to specify step-specific overrides.
1877
+
1878
+ Parameters
1879
+ ----------
1880
+ packages : Dict[str, str], default: {}
1881
+ Packages to use for this flow. The key is the name of the package
1882
+ and the value is the version to use.
1883
+ python : str, optional, default: None
1884
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1885
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1886
+ """
1887
+ ...
1888
+
1889
+ @typing.overload
1890
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1891
+ ...
1892
+
1893
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1894
+ """
1895
+ Specifies the PyPI packages for all steps of the flow.
1896
+
1897
+ Use `@pypi_base` to set common packages required by all
1898
+ steps and use `@pypi` to specify step-specific overrides.
1899
+
1900
+ Parameters
1901
+ ----------
1902
+ packages : Dict[str, str], default: {}
1903
+ Packages to use for this flow. The key is the name of the package
1904
+ and the value is the version to use.
1905
+ python : str, optional, default: None
1906
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1907
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1635
1908
  """
1636
1909
  ...
1637
1910
 
@@ -1678,248 +1951,5 @@ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str,
1678
1951
  """
1679
1952
  ...
1680
1953
 
1681
- @typing.overload
1682
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1683
- """
1684
- Specifies the times when the flow should be run when running on a
1685
- production scheduler.
1686
-
1687
-
1688
- Parameters
1689
- ----------
1690
- hourly : bool, default False
1691
- Run the workflow hourly.
1692
- daily : bool, default True
1693
- Run the workflow daily.
1694
- weekly : bool, default False
1695
- Run the workflow weekly.
1696
- cron : str, optional, default None
1697
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1698
- specified by this expression.
1699
- timezone : str, optional, default None
1700
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1701
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1702
- """
1703
- ...
1704
-
1705
- @typing.overload
1706
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1707
- ...
1708
-
1709
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1710
- """
1711
- Specifies the times when the flow should be run when running on a
1712
- production scheduler.
1713
-
1714
-
1715
- Parameters
1716
- ----------
1717
- hourly : bool, default False
1718
- Run the workflow hourly.
1719
- daily : bool, default True
1720
- Run the workflow daily.
1721
- weekly : bool, default False
1722
- Run the workflow weekly.
1723
- cron : str, optional, default None
1724
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1725
- specified by this expression.
1726
- timezone : str, optional, default None
1727
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1728
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1729
- """
1730
- ...
1731
-
1732
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1733
- """
1734
- Allows setting external datastores to save data for the
1735
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1736
-
1737
- This decorator is useful when users wish to save data to a different datastore
1738
- than what is configured in Metaflow. This can be for variety of reasons:
1739
-
1740
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1741
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1742
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1743
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1744
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1745
-
1746
- Usage:
1747
- ----------
1748
-
1749
- - Using a custom IAM role to access the datastore.
1750
-
1751
- ```python
1752
- @with_artifact_store(
1753
- type="s3",
1754
- config=lambda: {
1755
- "root": "s3://my-bucket-foo/path/to/root",
1756
- "role_arn": ROLE,
1757
- },
1758
- )
1759
- class MyFlow(FlowSpec):
1760
-
1761
- @checkpoint
1762
- @step
1763
- def start(self):
1764
- with open("my_file.txt", "w") as f:
1765
- f.write("Hello, World!")
1766
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1767
- self.next(self.end)
1768
-
1769
- ```
1770
-
1771
- - Using credentials to access the s3-compatible datastore.
1772
-
1773
- ```python
1774
- @with_artifact_store(
1775
- type="s3",
1776
- config=lambda: {
1777
- "root": "s3://my-bucket-foo/path/to/root",
1778
- "client_params": {
1779
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1780
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1781
- },
1782
- },
1783
- )
1784
- class MyFlow(FlowSpec):
1785
-
1786
- @checkpoint
1787
- @step
1788
- def start(self):
1789
- with open("my_file.txt", "w") as f:
1790
- f.write("Hello, World!")
1791
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1792
- self.next(self.end)
1793
-
1794
- ```
1795
-
1796
- - Accessing objects stored in external datastores after task execution.
1797
-
1798
- ```python
1799
- run = Run("CheckpointsTestsFlow/8992")
1800
- with artifact_store_from(run=run, config={
1801
- "client_params": {
1802
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1803
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1804
- },
1805
- }):
1806
- with Checkpoint() as cp:
1807
- latest = cp.list(
1808
- task=run["start"].task
1809
- )[0]
1810
- print(latest)
1811
- cp.load(
1812
- latest,
1813
- "test-checkpoints"
1814
- )
1815
-
1816
- task = Task("TorchTuneFlow/8484/train/53673")
1817
- with artifact_store_from(run=run, config={
1818
- "client_params": {
1819
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1820
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1821
- },
1822
- }):
1823
- load_model(
1824
- task.data.model_ref,
1825
- "test-models"
1826
- )
1827
- ```
1828
- Parameters:
1829
- ----------
1830
-
1831
- type: str
1832
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1833
-
1834
- config: dict or Callable
1835
- Dictionary of configuration options for the datastore. The following keys are required:
1836
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1837
- - example: 's3://bucket-name/path/to/root'
1838
- - example: 'gs://bucket-name/path/to/root'
1839
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1840
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1841
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1842
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1843
- """
1844
- ...
1845
-
1846
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1847
- """
1848
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1849
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1850
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1851
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1852
- starts only after all sensors finish.
1853
-
1854
-
1855
- Parameters
1856
- ----------
1857
- timeout : int
1858
- Time, in seconds before the task times out and fails. (Default: 3600)
1859
- poke_interval : int
1860
- Time in seconds that the job should wait in between each try. (Default: 60)
1861
- mode : str
1862
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1863
- exponential_backoff : bool
1864
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1865
- pool : str
1866
- the slot pool this task should run in,
1867
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1868
- soft_fail : bool
1869
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1870
- name : str
1871
- Name of the sensor on Airflow
1872
- description : str
1873
- Description of sensor in the Airflow UI
1874
- bucket_key : Union[str, List[str]]
1875
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1876
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1877
- bucket_name : str
1878
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1879
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1880
- wildcard_match : bool
1881
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1882
- aws_conn_id : str
1883
- a reference to the s3 connection on Airflow. (Default: None)
1884
- verify : bool
1885
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1886
- """
1887
- ...
1888
-
1889
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1890
- """
1891
- Specifies what flows belong to the same project.
1892
-
1893
- A project-specific namespace is created for all flows that
1894
- use the same `@project(name)`.
1895
-
1896
-
1897
- Parameters
1898
- ----------
1899
- name : str
1900
- Project name. Make sure that the name is unique amongst all
1901
- projects that use the same production scheduler. The name may
1902
- contain only lowercase alphanumeric characters and underscores.
1903
-
1904
- branch : Optional[str], default None
1905
- The branch to use. If not specified, the branch is set to
1906
- `user.<username>` unless `production` is set to `True`. This can
1907
- also be set on the command line using `--branch` as a top-level option.
1908
- It is an error to specify `branch` in the decorator and on the command line.
1909
-
1910
- production : bool, default False
1911
- Whether or not the branch is the production branch. This can also be set on the
1912
- command line using `--production` as a top-level option. It is an error to specify
1913
- `production` in the decorator and on the command line.
1914
- The project branch name will be:
1915
- - if `branch` is specified:
1916
- - if `production` is True: `prod.<branch>`
1917
- - if `production` is False: `test.<branch>`
1918
- - if `branch` is not specified:
1919
- - if `production` is True: `prod`
1920
- - if `production` is False: `user.<username>`
1921
- """
1922
- ...
1923
-
1924
1954
  pkg_name: str
1925
1955