ob-metaflow-stubs 6.0.10.16__py2.py3-none-any.whl → 6.0.10.18__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-stubs might be problematic. Click here for more details.

Files changed (266) hide show
  1. metaflow-stubs/__init__.pyi +1058 -1058
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +6 -6
  8. metaflow-stubs/client/filecache.pyi +3 -3
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +5 -5
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/meta_files.pyi +2 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +2 -2
  20. metaflow-stubs/metaflow_current.pyi +77 -77
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +3 -3
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +3 -3
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +2 -2
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +3 -3
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +3 -3
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +3 -3
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/cards/__init__.pyi +2 -2
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/cards/hf_hub_card.pyi +3 -3
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +2 -2
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +4 -4
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +3 -3
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  64. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  65. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +3 -3
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +3 -3
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +2 -2
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +2 -2
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +3 -3
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +2 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +3 -3
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +3 -3
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +3 -3
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +4 -4
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +2 -2
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +3 -3
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +3 -3
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +2 -2
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +3 -3
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +3 -3
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +2 -2
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +3 -3
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +3 -3
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +2 -2
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  110. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  111. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  112. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  113. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  114. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +2 -2
  115. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  116. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -2
  117. metaflow-stubs/mf_extensions/outerbounds/toplevel/s3_proxy.pyi +2 -2
  118. metaflow-stubs/multicore_utils.pyi +2 -2
  119. metaflow-stubs/ob_internal.pyi +2 -2
  120. metaflow-stubs/packaging_sys/__init__.pyi +6 -6
  121. metaflow-stubs/packaging_sys/backend.pyi +2 -2
  122. metaflow-stubs/packaging_sys/distribution_support.pyi +2 -2
  123. metaflow-stubs/packaging_sys/tar_backend.pyi +5 -5
  124. metaflow-stubs/packaging_sys/utils.pyi +2 -2
  125. metaflow-stubs/packaging_sys/v1.pyi +2 -2
  126. metaflow-stubs/parameters.pyi +3 -3
  127. metaflow-stubs/plugins/__init__.pyi +13 -13
  128. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  129. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  130. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  131. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  132. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  133. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  134. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  135. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  136. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  137. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  138. metaflow-stubs/plugins/argo/argo_workflows.pyi +4 -4
  139. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  140. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  141. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +3 -3
  142. metaflow-stubs/plugins/argo/exit_hooks.pyi +3 -3
  143. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  144. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  145. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  146. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  147. metaflow-stubs/plugins/aws/batch/batch.pyi +2 -2
  148. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  149. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -2
  150. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  151. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +2 -2
  152. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  153. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  154. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  155. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +2 -2
  156. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  157. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +2 -2
  158. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +2 -2
  159. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  160. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  161. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  162. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +2 -2
  163. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  164. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  165. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  166. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  167. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  169. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  170. metaflow-stubs/plugins/cards/card_decorator.pyi +3 -3
  171. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  172. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  173. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  174. metaflow-stubs/plugins/cards/card_modules/components.pyi +16 -4
  175. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  176. metaflow-stubs/plugins/cards/card_modules/json_viewer.pyi +2 -2
  177. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  178. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  179. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  180. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  181. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  182. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  183. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  184. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  185. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  186. metaflow-stubs/plugins/datatools/s3/s3.pyi +4 -4
  187. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  188. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  189. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  190. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  191. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  192. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  193. metaflow-stubs/plugins/exit_hook/__init__.pyi +2 -2
  194. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +2 -2
  195. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  196. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  197. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  198. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +2 -2
  199. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  200. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  201. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  202. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  203. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  204. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  205. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +2 -2
  206. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  207. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +2 -2
  208. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  209. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  210. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  211. metaflow-stubs/plugins/optuna/__init__.pyi +2 -2
  212. metaflow-stubs/plugins/parallel_decorator.pyi +2 -2
  213. metaflow-stubs/plugins/parsers.pyi +2 -2
  214. metaflow-stubs/plugins/perimeters.pyi +2 -2
  215. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  216. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  217. metaflow-stubs/plugins/pypi/conda_decorator.pyi +2 -2
  218. metaflow-stubs/plugins/pypi/conda_environment.pyi +5 -5
  219. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  220. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +2 -2
  221. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  222. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  223. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  224. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  225. metaflow-stubs/plugins/secrets/__init__.pyi +2 -2
  226. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  227. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  228. metaflow-stubs/plugins/secrets/secrets_func.pyi +2 -2
  229. metaflow-stubs/plugins/secrets/secrets_spec.pyi +2 -2
  230. metaflow-stubs/plugins/secrets/utils.pyi +2 -2
  231. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  232. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  233. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  234. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  235. metaflow-stubs/plugins/torchtune/__init__.pyi +2 -2
  236. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  237. metaflow-stubs/plugins/uv/uv_environment.pyi +3 -3
  238. metaflow-stubs/profilers/__init__.pyi +2 -2
  239. metaflow-stubs/pylint_wrapper.pyi +2 -2
  240. metaflow-stubs/runner/__init__.pyi +2 -2
  241. metaflow-stubs/runner/deployer.pyi +5 -5
  242. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  243. metaflow-stubs/runner/metaflow_runner.pyi +4 -4
  244. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  245. metaflow-stubs/runner/nbrun.pyi +2 -2
  246. metaflow-stubs/runner/subprocess_manager.pyi +2 -2
  247. metaflow-stubs/runner/utils.pyi +3 -3
  248. metaflow-stubs/system/__init__.pyi +2 -2
  249. metaflow-stubs/system/system_logger.pyi +2 -2
  250. metaflow-stubs/system/system_monitor.pyi +2 -2
  251. metaflow-stubs/tagging_util.pyi +2 -2
  252. metaflow-stubs/tuple_util.pyi +2 -2
  253. metaflow-stubs/user_configs/__init__.pyi +2 -2
  254. metaflow-stubs/user_configs/config_options.pyi +2 -2
  255. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  256. metaflow-stubs/user_decorators/__init__.pyi +2 -2
  257. metaflow-stubs/user_decorators/common.pyi +2 -2
  258. metaflow-stubs/user_decorators/mutable_flow.pyi +6 -6
  259. metaflow-stubs/user_decorators/mutable_step.pyi +6 -6
  260. metaflow-stubs/user_decorators/user_flow_decorator.pyi +4 -4
  261. metaflow-stubs/user_decorators/user_step_decorator.pyi +5 -5
  262. {ob_metaflow_stubs-6.0.10.16.dist-info → ob_metaflow_stubs-6.0.10.18.dist-info}/METADATA +1 -1
  263. ob_metaflow_stubs-6.0.10.18.dist-info/RECORD +266 -0
  264. ob_metaflow_stubs-6.0.10.16.dist-info/RECORD +0 -266
  265. {ob_metaflow_stubs-6.0.10.16.dist-info → ob_metaflow_stubs-6.0.10.18.dist-info}/WHEEL +0 -0
  266. {ob_metaflow_stubs-6.0.10.16.dist-info → ob_metaflow_stubs-6.0.10.18.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.18.11.1+obcheckpoint(0.2.8);ob(v1) #
4
- # Generated on 2025-10-13T07:07:26.927215 #
3
+ # MF version: 2.18.12.1+obcheckpoint(0.2.8);ob(v1) #
4
+ # Generated on 2025-10-20T19:13:33.388213 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -40,18 +40,18 @@ from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
40
  from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
41
  from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
42
42
  from . import cards as cards
43
- from . import tuple_util as tuple_util
44
43
  from . import metaflow_git as metaflow_git
44
+ from . import tuple_util as tuple_util
45
45
  from . import events as events
46
46
  from . import runner as runner
47
47
  from . import plugins as plugins
48
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
49
49
  from . import includefile as includefile
50
50
  from .includefile import IncludeFile as IncludeFile
51
- from .plugins.parsers import yaml_parser as yaml_parser
51
+ from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
52
52
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
53
53
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
54
- from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
54
+ from .plugins.parsers import yaml_parser as yaml_parser
55
55
  from . import client as client
56
56
  from .client.core import namespace as namespace
57
57
  from .client.core import get_namespace as get_namespace
@@ -169,101 +169,13 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
169
169
  """
170
170
  ...
171
171
 
172
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
173
- """
174
- This decorator is used to run vllm APIs as Metaflow task sidecars.
175
-
176
- User code call
177
- --------------
178
- @vllm(
179
- model="...",
180
- ...
181
- )
182
-
183
- Valid backend options
184
- ---------------------
185
- - 'local': Run as a separate process on the local task machine.
186
-
187
- Valid model options
188
- -------------------
189
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
190
-
191
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
192
- If you need multiple models, you must create multiple @vllm decorators.
193
-
194
-
195
- Parameters
196
- ----------
197
- model: str
198
- HuggingFace model identifier to be served by vLLM.
199
- backend: str
200
- Determines where and how to run the vLLM process.
201
- openai_api_server: bool
202
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
203
- Default is False (uses native engine).
204
- Set to True for backward compatibility with existing code.
205
- debug: bool
206
- Whether to turn on verbose debugging logs.
207
- card_refresh_interval: int
208
- Interval in seconds for refreshing the vLLM status card.
209
- Only used when openai_api_server=True.
210
- max_retries: int
211
- Maximum number of retries checking for vLLM server startup.
212
- Only used when openai_api_server=True.
213
- retry_alert_frequency: int
214
- Frequency of alert logs for vLLM server startup retries.
215
- Only used when openai_api_server=True.
216
- engine_args : dict
217
- Additional keyword arguments to pass to the vLLM engine.
218
- For example, `tensor_parallel_size=2`.
219
- """
220
- ...
221
-
222
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
172
+ def nebius_s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
223
173
  """
224
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
225
-
226
- User code call
227
- --------------
228
- @ollama(
229
- models=[...],
230
- ...
231
- )
232
-
233
- Valid backend options
234
- ---------------------
235
- - 'local': Run as a separate process on the local task machine.
236
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
237
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
238
-
239
- Valid model options
240
- -------------------
241
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
174
+ `@nebius_s3_proxy` is a Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
175
+ It exists to make it easier for users to know that this decorator should only be used with
176
+ a Neo Cloud like Nebius. The underlying mechanics of the decorator is the same as the `@s3_proxy`:
242
177
 
243
178
 
244
- Parameters
245
- ----------
246
- models: list[str]
247
- List of Ollama containers running models in sidecars.
248
- backend: str
249
- Determines where and how to run the Ollama process.
250
- force_pull: bool
251
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
252
- cache_update_policy: str
253
- Cache update policy: "auto", "force", or "never".
254
- force_cache_update: bool
255
- Simple override for "force" cache update policy.
256
- debug: bool
257
- Whether to turn on verbose debugging logs.
258
- circuit_breaker_config: dict
259
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
260
- timeout_config: dict
261
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
262
- """
263
- ...
264
-
265
- def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
266
- """
267
179
  Set up an S3 proxy that caches objects in an external, S3‑compatible bucket
268
180
  for S3 read and write requests.
269
181
 
@@ -321,236 +233,353 @@ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typin
321
233
  """
322
234
  ...
323
235
 
324
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
236
+ @typing.overload
237
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
325
238
  """
326
- Specifies that this step should execute on Kubernetes.
239
+ Specifies the Conda environment for the step.
240
+
241
+ Information in this decorator will augment any
242
+ attributes set in the `@conda_base` flow-level decorator. Hence,
243
+ you can use `@conda_base` to set packages required by all
244
+ steps and use `@conda` to specify step-specific overrides.
327
245
 
328
246
 
329
247
  Parameters
330
248
  ----------
331
- cpu : int, default 1
332
- Number of CPUs required for this step. If `@resources` is
333
- also present, the maximum value from all decorators is used.
334
- memory : int, default 4096
335
- Memory size (in MB) required for this step. If
336
- `@resources` is also present, the maximum value from all decorators is
337
- used.
338
- disk : int, default 10240
339
- Disk size (in MB) required for this step. If
340
- `@resources` is also present, the maximum value from all decorators is
341
- used.
342
- image : str, optional, default None
343
- Docker image to use when launching on Kubernetes. If not specified, and
344
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
345
- not, a default Docker image mapping to the current version of Python is used.
346
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
347
- If given, the imagePullPolicy to be applied to the Docker image of the step.
348
- image_pull_secrets: List[str], default []
349
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
350
- Kubernetes image pull secrets to use when pulling container images
351
- in Kubernetes.
352
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
353
- Kubernetes service account to use when launching pod in Kubernetes.
354
- secrets : List[str], optional, default None
355
- Kubernetes secrets to use when launching pod in Kubernetes. These
356
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
357
- in Metaflow configuration.
358
- node_selector: Union[Dict[str,str], str], optional, default None
359
- Kubernetes node selector(s) to apply to the pod running the task.
360
- Can be passed in as a comma separated string of values e.g.
361
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
362
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
363
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
364
- Kubernetes namespace to use when launching pod in Kubernetes.
365
- gpu : int, optional, default None
366
- Number of GPUs required for this step. A value of zero implies that
367
- the scheduled node should not have GPUs.
368
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
369
- The vendor of the GPUs to be used for this step.
370
- tolerations : List[Dict[str,str]], default []
371
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
372
- Kubernetes tolerations to use when launching pod in Kubernetes.
373
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
374
- Kubernetes labels to use when launching pod in Kubernetes.
375
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
376
- Kubernetes annotations to use when launching pod in Kubernetes.
377
- use_tmpfs : bool, default False
378
- This enables an explicit tmpfs mount for this step.
379
- tmpfs_tempdir : bool, default True
380
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
381
- tmpfs_size : int, optional, default: None
382
- The value for the size (in MiB) of the tmpfs mount for this step.
383
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
384
- memory allocated for this step.
385
- tmpfs_path : str, optional, default /metaflow_temp
386
- Path to tmpfs mount for this step.
387
- persistent_volume_claims : Dict[str, str], optional, default None
388
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
389
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
390
- shared_memory: int, optional
391
- Shared memory size (in MiB) required for this step
392
- port: int, optional
393
- Port number to specify in the Kubernetes job object
394
- compute_pool : str, optional, default None
395
- Compute pool to be used for for this step.
396
- If not specified, any accessible compute pool within the perimeter is used.
397
- hostname_resolution_timeout: int, default 10 * 60
398
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
399
- Only applicable when @parallel is used.
400
- qos: str, default: Burstable
401
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
402
-
403
- security_context: Dict[str, Any], optional, default None
404
- Container security context. Applies to the task container. Allows the following keys:
405
- - privileged: bool, optional, default None
406
- - allow_privilege_escalation: bool, optional, default None
407
- - run_as_user: int, optional, default None
408
- - run_as_group: int, optional, default None
409
- - run_as_non_root: bool, optional, default None
249
+ packages : Dict[str, str], default {}
250
+ Packages to use for this step. The key is the name of the package
251
+ and the value is the version to use.
252
+ libraries : Dict[str, str], default {}
253
+ Supported for backward compatibility. When used with packages, packages will take precedence.
254
+ python : str, optional, default None
255
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
256
+ that the version used will correspond to the version of the Python interpreter used to start the run.
257
+ disabled : bool, default False
258
+ If set to True, disables @conda.
410
259
  """
411
260
  ...
412
261
 
413
262
  @typing.overload
414
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
415
- """
416
- Decorator prototype for all step decorators. This function gets specialized
417
- and imported for all decorators types by _import_plugin_decorators().
418
- """
263
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
419
264
  ...
420
265
 
421
266
  @typing.overload
422
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
267
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
423
268
  ...
424
269
 
425
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
270
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
426
271
  """
427
- Decorator prototype for all step decorators. This function gets specialized
428
- and imported for all decorators types by _import_plugin_decorators().
272
+ Specifies the Conda environment for the step.
273
+
274
+ Information in this decorator will augment any
275
+ attributes set in the `@conda_base` flow-level decorator. Hence,
276
+ you can use `@conda_base` to set packages required by all
277
+ steps and use `@conda` to specify step-specific overrides.
278
+
279
+
280
+ Parameters
281
+ ----------
282
+ packages : Dict[str, str], default {}
283
+ Packages to use for this step. The key is the name of the package
284
+ and the value is the version to use.
285
+ libraries : Dict[str, str], default {}
286
+ Supported for backward compatibility. When used with packages, packages will take precedence.
287
+ python : str, optional, default None
288
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
289
+ that the version used will correspond to the version of the Python interpreter used to start the run.
290
+ disabled : bool, default False
291
+ If set to True, disables @conda.
429
292
  """
430
293
  ...
431
294
 
432
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, cache_scope: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
295
+ @typing.overload
296
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
433
297
  """
434
- Decorator that helps cache, version, and store models/datasets from the Hugging Face Hub.
298
+ Enables checkpointing for a step.
435
299
 
436
- Examples
437
- --------
300
+ > Examples
301
+
302
+ - Saving Checkpoints
438
303
 
439
304
  ```python
440
- # **Usage: creating references to models from the Hugging Face Hub that may be loaded in downstream steps**
441
- @huggingface_hub
305
+ @checkpoint
442
306
  @step
443
- def pull_model_from_huggingface(self):
444
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
445
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
446
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
447
- # value of the function is a reference to the model in the backend storage.
448
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
449
-
450
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
451
- self.llama_model = current.huggingface_hub.snapshot_download(
452
- repo_id=self.model_id,
453
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
454
- )
455
- self.next(self.train)
307
+ def train(self):
308
+ model = create_model(self.parameters, checkpoint_path = None)
309
+ for i in range(self.epochs):
310
+ # some training logic
311
+ loss = model.train(self.dataset)
312
+ if i % 10 == 0:
313
+ model.save(
314
+ current.checkpoint.directory,
315
+ )
316
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
317
+ # and returns a reference dictionary to the checkpoint saved in the datastore
318
+ self.latest_checkpoint = current.checkpoint.save(
319
+ name="epoch_checkpoint",
320
+ metadata={
321
+ "epoch": i,
322
+ "loss": loss,
323
+ }
324
+ )
325
+ ```
456
326
 
457
- # **Usage: explicitly loading models at runtime from the Hugging Face Hub or from cache (from Metaflow's datastore)**
458
- @huggingface_hub
327
+ - Using Loaded Checkpoints
328
+
329
+ ```python
330
+ @retry(times=3)
331
+ @checkpoint
459
332
  @step
460
- def run_training(self):
461
- # Temporary directory (auto-cleaned on exit)
462
- with current.huggingface_hub.load(
463
- repo_id="google-bert/bert-base-uncased",
464
- allow_patterns=["*.bin"],
465
- ) as local_path:
466
- # Use files under local_path
467
- train_model(local_path)
333
+ def train(self):
334
+ # Assume that the task has restarted and the previous attempt of the task
335
+ # saved a checkpoint
336
+ checkpoint_path = None
337
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
338
+ print("Loaded checkpoint from the previous attempt")
339
+ checkpoint_path = current.checkpoint.directory
340
+
341
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
342
+ for i in range(self.epochs):
468
343
  ...
344
+ ```
469
345
 
470
- # **Usage: loading models directly from the Hugging Face Hub or from cache (from Metaflow's datastore)**
471
346
 
472
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
473
- @step
474
- def pull_model_from_huggingface(self):
475
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
347
+ Parameters
348
+ ----------
349
+ load_policy : str, default: "fresh"
350
+ The policy for loading the checkpoint. The following policies are supported:
351
+ - "eager": Loads the the latest available checkpoint within the namespace.
352
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
353
+ will be loaded at the start of the task.
354
+ - "none": Do not load any checkpoint
355
+ - "fresh": Loads the lastest checkpoint created within the running Task.
356
+ This mode helps loading checkpoints across various retry attempts of the same task.
357
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
358
+ created within the task will be loaded when the task is retries execution on failure.
476
359
 
477
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora", "/my-lora-directory")])
360
+ temp_dir_root : str, default: None
361
+ The root directory under which `current.checkpoint.directory` will be created.
362
+ """
363
+ ...
364
+
365
+ @typing.overload
366
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
367
+ ...
368
+
369
+ @typing.overload
370
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
371
+ ...
372
+
373
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
374
+ """
375
+ Enables checkpointing for a step.
376
+
377
+ > Examples
378
+
379
+ - Saving Checkpoints
380
+
381
+ ```python
382
+ @checkpoint
478
383
  @step
479
- def finetune_model(self):
480
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
481
- # path_to_model will be /my-directory
384
+ def train(self):
385
+ model = create_model(self.parameters, checkpoint_path = None)
386
+ for i in range(self.epochs):
387
+ # some training logic
388
+ loss = model.train(self.dataset)
389
+ if i % 10 == 0:
390
+ model.save(
391
+ current.checkpoint.directory,
392
+ )
393
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
394
+ # and returns a reference dictionary to the checkpoint saved in the datastore
395
+ self.latest_checkpoint = current.checkpoint.save(
396
+ name="epoch_checkpoint",
397
+ metadata={
398
+ "epoch": i,
399
+ "loss": loss,
400
+ }
401
+ )
402
+ ```
482
403
 
404
+ - Using Loaded Checkpoints
483
405
 
484
- # Takes all the arguments passed to `snapshot_download`
485
- # except for `local_dir`
486
- @huggingface_hub(load=[
487
- {
488
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
489
- },
490
- {
491
- "repo_id": "myorg/mistral-lora",
492
- "repo_type": "model",
493
- },
494
- ])
406
+ ```python
407
+ @retry(times=3)
408
+ @checkpoint
495
409
  @step
496
- def finetune_model(self):
497
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
498
- # path_to_model will be /my-directory
410
+ def train(self):
411
+ # Assume that the task has restarted and the previous attempt of the task
412
+ # saved a checkpoint
413
+ checkpoint_path = None
414
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
415
+ print("Loaded checkpoint from the previous attempt")
416
+ checkpoint_path = current.checkpoint.directory
417
+
418
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
419
+ for i in range(self.epochs):
420
+ ...
499
421
  ```
500
422
 
501
423
 
502
424
  Parameters
503
425
  ----------
504
- temp_dir_root : str, optional
505
- The root directory that will hold the temporary directory where objects will be downloaded.
426
+ load_policy : str, default: "fresh"
427
+ The policy for loading the checkpoint. The following policies are supported:
428
+ - "eager": Loads the the latest available checkpoint within the namespace.
429
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
430
+ will be loaded at the start of the task.
431
+ - "none": Do not load any checkpoint
432
+ - "fresh": Loads the lastest checkpoint created within the running Task.
433
+ This mode helps loading checkpoints across various retry attempts of the same task.
434
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
435
+ created within the task will be loaded when the task is retries execution on failure.
506
436
 
507
- cache_scope : str, optional
508
- The scope of the cache. Can be `checkpoint` / `flow` / `global`.
509
- - `checkpoint` (default): All repos are stored like objects saved by `@checkpoint`.
510
- i.e., the cached path is derived from the namespace, flow, step, and Metaflow foreach iteration.
511
- Any repo downloaded under this scope will only be retrieved from the cache when the step runs under the same namespace in the same flow (at the same foreach index).
437
+ temp_dir_root : str, default: None
438
+ The root directory under which `current.checkpoint.directory` will be created.
439
+ """
440
+ ...
441
+
442
+ @typing.overload
443
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
444
+ """
445
+ Creates a human-readable report, a Metaflow Card, after this step completes.
512
446
 
513
- - `flow`: All repos are cached under the flow, regardless of namespace.
514
- i.e., the cached path is derived solely from the flow name.
515
- When to use this mode: (1) Multiple users are executing the same flow and want shared access to the repos cached by the decorator. (2) Multiple versions of a flow are deployed, all needing access to the same repos cached by the decorator.
447
+ Note that you may add multiple `@card` decorators in a step with different parameters.
516
448
 
517
- - `global`: All repos are cached under a globally static path.
518
- i.e., the base path of the cache is static and all repos are stored under it.
519
- When to use this mode:
520
- - All repos from the Hugging Face Hub need to be shared by users across all flow executions.
521
- - Each caching scope comes with its own trade-offs:
522
- - `checkpoint`:
523
- - Has explicit control over when caches are populated (controlled by the same flow that has the `@huggingface_hub` decorator) but ends up hitting the Hugging Face Hub more often if there are many users/namespaces/steps.
524
- - Since objects are written on a `namespace/flow/step` basis, the blast radius of a bad checkpoint is limited to a particular flow in a namespace.
525
- - `flow`:
526
- - Has less control over when caches are populated (can be written by any execution instance of a flow from any namespace) but results in more cache hits.
527
- - The blast radius of a bad checkpoint is limited to all runs of a particular flow.
528
- - It doesn't promote cache reuse across flows.
529
- - `global`:
530
- - Has no control over when caches are populated (can be written by any flow execution) but has the highest cache hit rate.
531
- - It promotes cache reuse across flows.
532
- - The blast radius of a bad checkpoint spans every flow that could be using a particular repo.
533
449
 
534
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
535
- The list of repos (models/datasets) to load.
450
+ Parameters
451
+ ----------
452
+ type : str, default 'default'
453
+ Card type.
454
+ id : str, optional, default None
455
+ If multiple cards are present, use this id to identify this card.
456
+ options : Dict[str, Any], default {}
457
+ Options passed to the card. The contents depend on the card type.
458
+ timeout : int, default 45
459
+ Interrupt reporting if it takes more than this many seconds.
460
+ """
461
+ ...
462
+
463
+ @typing.overload
464
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
465
+ ...
466
+
467
+ @typing.overload
468
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
469
+ ...
470
+
471
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
472
+ """
473
+ Creates a human-readable report, a Metaflow Card, after this step completes.
536
474
 
537
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
475
+ Note that you may add multiple `@card` decorators in a step with different parameters.
538
476
 
539
- - If repo (model/dataset) is not found in the datastore:
540
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
541
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
542
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
543
477
 
544
- - If repo is found in the datastore:
545
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
478
+ Parameters
479
+ ----------
480
+ type : str, default 'default'
481
+ Card type.
482
+ id : str, optional, default None
483
+ If multiple cards are present, use this id to identify this card.
484
+ options : Dict[str, Any], default {}
485
+ Options passed to the card. The contents depend on the card type.
486
+ timeout : int, default 45
487
+ Interrupt reporting if it takes more than this many seconds.
546
488
  """
547
489
  ...
548
490
 
549
- def nebius_s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
491
+ @typing.overload
492
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
550
493
  """
551
- `@nebius_s3_proxy` is a Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
494
+ Internal decorator to support Fast bakery
495
+ """
496
+ ...
497
+
498
+ @typing.overload
499
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
500
+ ...
501
+
502
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
503
+ """
504
+ Internal decorator to support Fast bakery
505
+ """
506
+ ...
507
+
508
+ @typing.overload
509
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
510
+ """
511
+ Decorator prototype for all step decorators. This function gets specialized
512
+ and imported for all decorators types by _import_plugin_decorators().
513
+ """
514
+ ...
515
+
516
+ @typing.overload
517
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
518
+ ...
519
+
520
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
521
+ """
522
+ Decorator prototype for all step decorators. This function gets specialized
523
+ and imported for all decorators types by _import_plugin_decorators().
524
+ """
525
+ ...
526
+
527
+ @typing.overload
528
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
529
+ """
530
+ Specifies the PyPI packages for the step.
531
+
532
+ Information in this decorator will augment any
533
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
534
+ you can use `@pypi_base` to set packages required by all
535
+ steps and use `@pypi` to specify step-specific overrides.
536
+
537
+
538
+ Parameters
539
+ ----------
540
+ packages : Dict[str, str], default: {}
541
+ Packages to use for this step. The key is the name of the package
542
+ and the value is the version to use.
543
+ python : str, optional, default: None
544
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
545
+ that the version used will correspond to the version of the Python interpreter used to start the run.
546
+ """
547
+ ...
548
+
549
+ @typing.overload
550
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
551
+ ...
552
+
553
+ @typing.overload
554
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
555
+ ...
556
+
557
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
558
+ """
559
+ Specifies the PyPI packages for the step.
560
+
561
+ Information in this decorator will augment any
562
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
563
+ you can use `@pypi_base` to set packages required by all
564
+ steps and use `@pypi` to specify step-specific overrides.
565
+
566
+
567
+ Parameters
568
+ ----------
569
+ packages : Dict[str, str], default: {}
570
+ Packages to use for this step. The key is the name of the package
571
+ and the value is the version to use.
572
+ python : str, optional, default: None
573
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
574
+ that the version used will correspond to the version of the Python interpreter used to start the run.
575
+ """
576
+ ...
577
+
578
+ def coreweave_s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
579
+ """
580
+ `@coreweave_s3_proxy` is a CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
552
581
  It exists to make it easier for users to know that this decorator should only be used with
553
- a Neo Cloud like Nebius. The underlying mechanics of the decorator is the same as the `@s3_proxy`:
582
+ a Neo Cloud like CoreWeave. The underlying mechanics of the decorator is the same as the `@s3_proxy`:
554
583
 
555
584
 
556
585
  Set up an S3 proxy that caches objects in an external, S3‑compatible bucket
@@ -611,162 +640,72 @@ def nebius_s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode
611
640
  ...
612
641
 
613
642
  @typing.overload
614
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
643
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
615
644
  """
616
- Internal decorator to support Fast bakery
645
+ A simple decorator that demonstrates using CardDecoratorInjector
646
+ to inject a card and render simple markdown content.
617
647
  """
618
648
  ...
619
649
 
620
650
  @typing.overload
621
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
622
- ...
623
-
624
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
625
- """
626
- Internal decorator to support Fast bakery
627
- """
651
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
628
652
  ...
629
653
 
630
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
654
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
631
655
  """
632
- Specifies that this step should execute on DGX cloud.
633
-
634
-
635
- Parameters
636
- ----------
637
- gpu : int
638
- Number of GPUs to use.
639
- gpu_type : str
640
- Type of Nvidia GPU to use.
656
+ A simple decorator that demonstrates using CardDecoratorInjector
657
+ to inject a card and render simple markdown content.
641
658
  """
642
659
  ...
643
660
 
644
661
  @typing.overload
645
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
662
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
646
663
  """
647
- Enables loading / saving of models within a step.
648
-
649
- > Examples
650
- - Saving Models
651
- ```python
652
- @model
653
- @step
654
- def train(self):
655
- # current.model.save returns a dictionary reference to the model saved
656
- self.my_model = current.model.save(
657
- path_to_my_model,
658
- label="my_model",
659
- metadata={
660
- "epochs": 10,
661
- "batch-size": 32,
662
- "learning-rate": 0.001,
663
- }
664
- )
665
- self.next(self.test)
666
-
667
- @model(load="my_model")
668
- @step
669
- def test(self):
670
- # `current.model.loaded` returns a dictionary of the loaded models
671
- # where the key is the name of the artifact and the value is the path to the model
672
- print(os.listdir(current.model.loaded["my_model"]))
673
- self.next(self.end)
674
- ```
664
+ Specifies that the step will success under all circumstances.
675
665
 
676
- - Loading models
677
- ```python
678
- @step
679
- def train(self):
680
- # current.model.load returns the path to the model loaded
681
- checkpoint_path = current.model.load(
682
- self.checkpoint_key,
683
- )
684
- model_path = current.model.load(
685
- self.model,
686
- )
687
- self.next(self.test)
688
- ```
666
+ The decorator will create an optional artifact, specified by `var`, which
667
+ contains the exception raised. You can use it to detect the presence
668
+ of errors, indicating that all happy-path artifacts produced by the step
669
+ are missing.
689
670
 
690
671
 
691
672
  Parameters
692
673
  ----------
693
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
694
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
695
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
696
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
697
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
698
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
699
-
700
- temp_dir_root : str, default: None
701
- The root directory under which `current.model.loaded` will store loaded models
674
+ var : str, optional, default None
675
+ Name of the artifact in which to store the caught exception.
676
+ If not specified, the exception is not stored.
677
+ print_exception : bool, default True
678
+ Determines whether or not the exception is printed to
679
+ stdout when caught.
702
680
  """
703
681
  ...
704
682
 
705
683
  @typing.overload
706
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
684
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
707
685
  ...
708
686
 
709
687
  @typing.overload
710
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
688
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
711
689
  ...
712
690
 
713
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
691
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
714
692
  """
715
- Enables loading / saving of models within a step.
716
-
717
- > Examples
718
- - Saving Models
719
- ```python
720
- @model
721
- @step
722
- def train(self):
723
- # current.model.save returns a dictionary reference to the model saved
724
- self.my_model = current.model.save(
725
- path_to_my_model,
726
- label="my_model",
727
- metadata={
728
- "epochs": 10,
729
- "batch-size": 32,
730
- "learning-rate": 0.001,
731
- }
732
- )
733
- self.next(self.test)
734
-
735
- @model(load="my_model")
736
- @step
737
- def test(self):
738
- # `current.model.loaded` returns a dictionary of the loaded models
739
- # where the key is the name of the artifact and the value is the path to the model
740
- print(os.listdir(current.model.loaded["my_model"]))
741
- self.next(self.end)
742
- ```
693
+ Specifies that the step will success under all circumstances.
743
694
 
744
- - Loading models
745
- ```python
746
- @step
747
- def train(self):
748
- # current.model.load returns the path to the model loaded
749
- checkpoint_path = current.model.load(
750
- self.checkpoint_key,
751
- )
752
- model_path = current.model.load(
753
- self.model,
754
- )
755
- self.next(self.test)
756
- ```
695
+ The decorator will create an optional artifact, specified by `var`, which
696
+ contains the exception raised. You can use it to detect the presence
697
+ of errors, indicating that all happy-path artifacts produced by the step
698
+ are missing.
757
699
 
758
700
 
759
701
  Parameters
760
702
  ----------
761
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
762
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
763
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
764
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
765
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
766
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
767
-
768
- temp_dir_root : str, default: None
769
- The root directory under which `current.model.loaded` will store loaded models
703
+ var : str, optional, default None
704
+ Name of the artifact in which to store the caught exception.
705
+ If not specified, the exception is not stored.
706
+ print_exception : bool, default True
707
+ Determines whether or not the exception is printed to
708
+ stdout when caught.
770
709
  """
771
710
  ...
772
711
 
@@ -826,498 +765,409 @@ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
826
765
  ...
827
766
 
828
767
  @typing.overload
829
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
830
- """
831
- Creates a human-readable report, a Metaflow Card, after this step completes.
832
-
833
- Note that you may add multiple `@card` decorators in a step with different parameters.
834
-
835
-
836
- Parameters
837
- ----------
838
- type : str, default 'default'
839
- Card type.
840
- id : str, optional, default None
841
- If multiple cards are present, use this id to identify this card.
842
- options : Dict[str, Any], default {}
843
- Options passed to the card. The contents depend on the card type.
844
- timeout : int, default 45
845
- Interrupt reporting if it takes more than this many seconds.
846
- """
847
- ...
848
-
849
- @typing.overload
850
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
851
- ...
852
-
853
- @typing.overload
854
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
855
- ...
856
-
857
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
858
- """
859
- Creates a human-readable report, a Metaflow Card, after this step completes.
860
-
861
- Note that you may add multiple `@card` decorators in a step with different parameters.
862
-
863
-
864
- Parameters
865
- ----------
866
- type : str, default 'default'
867
- Card type.
868
- id : str, optional, default None
869
- If multiple cards are present, use this id to identify this card.
870
- options : Dict[str, Any], default {}
871
- Options passed to the card. The contents depend on the card type.
872
- timeout : int, default 45
873
- Interrupt reporting if it takes more than this many seconds.
874
- """
875
- ...
876
-
877
- @typing.overload
878
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
879
- """
880
- Decorator prototype for all step decorators. This function gets specialized
881
- and imported for all decorators types by _import_plugin_decorators().
882
- """
883
- ...
884
-
885
- @typing.overload
886
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
887
- ...
888
-
889
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
890
- """
891
- Decorator prototype for all step decorators. This function gets specialized
892
- and imported for all decorators types by _import_plugin_decorators().
893
- """
894
- ...
895
-
896
- @typing.overload
897
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
768
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
898
769
  """
899
- Enables checkpointing for a step.
770
+ Enables loading / saving of models within a step.
900
771
 
901
772
  > Examples
902
-
903
- - Saving Checkpoints
904
-
773
+ - Saving Models
905
774
  ```python
906
- @checkpoint
775
+ @model
907
776
  @step
908
777
  def train(self):
909
- model = create_model(self.parameters, checkpoint_path = None)
910
- for i in range(self.epochs):
911
- # some training logic
912
- loss = model.train(self.dataset)
913
- if i % 10 == 0:
914
- model.save(
915
- current.checkpoint.directory,
916
- )
917
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
918
- # and returns a reference dictionary to the checkpoint saved in the datastore
919
- self.latest_checkpoint = current.checkpoint.save(
920
- name="epoch_checkpoint",
921
- metadata={
922
- "epoch": i,
923
- "loss": loss,
924
- }
925
- )
926
- ```
778
+ # current.model.save returns a dictionary reference to the model saved
779
+ self.my_model = current.model.save(
780
+ path_to_my_model,
781
+ label="my_model",
782
+ metadata={
783
+ "epochs": 10,
784
+ "batch-size": 32,
785
+ "learning-rate": 0.001,
786
+ }
787
+ )
788
+ self.next(self.test)
927
789
 
928
- - Using Loaded Checkpoints
790
+ @model(load="my_model")
791
+ @step
792
+ def test(self):
793
+ # `current.model.loaded` returns a dictionary of the loaded models
794
+ # where the key is the name of the artifact and the value is the path to the model
795
+ print(os.listdir(current.model.loaded["my_model"]))
796
+ self.next(self.end)
797
+ ```
929
798
 
799
+ - Loading models
930
800
  ```python
931
- @retry(times=3)
932
- @checkpoint
933
801
  @step
934
802
  def train(self):
935
- # Assume that the task has restarted and the previous attempt of the task
936
- # saved a checkpoint
937
- checkpoint_path = None
938
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
939
- print("Loaded checkpoint from the previous attempt")
940
- checkpoint_path = current.checkpoint.directory
941
-
942
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
943
- for i in range(self.epochs):
944
- ...
803
+ # current.model.load returns the path to the model loaded
804
+ checkpoint_path = current.model.load(
805
+ self.checkpoint_key,
806
+ )
807
+ model_path = current.model.load(
808
+ self.model,
809
+ )
810
+ self.next(self.test)
945
811
  ```
946
812
 
947
813
 
948
814
  Parameters
949
815
  ----------
950
- load_policy : str, default: "fresh"
951
- The policy for loading the checkpoint. The following policies are supported:
952
- - "eager": Loads the the latest available checkpoint within the namespace.
953
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
954
- will be loaded at the start of the task.
955
- - "none": Do not load any checkpoint
956
- - "fresh": Loads the lastest checkpoint created within the running Task.
957
- This mode helps loading checkpoints across various retry attempts of the same task.
958
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
959
- created within the task will be loaded when the task is retries execution on failure.
816
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
817
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
818
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
819
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
820
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
821
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
960
822
 
961
823
  temp_dir_root : str, default: None
962
- The root directory under which `current.checkpoint.directory` will be created.
824
+ The root directory under which `current.model.loaded` will store loaded models
963
825
  """
964
826
  ...
965
827
 
966
828
  @typing.overload
967
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
829
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
968
830
  ...
969
831
 
970
832
  @typing.overload
971
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
833
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
972
834
  ...
973
835
 
974
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
836
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
975
837
  """
976
- Enables checkpointing for a step.
838
+ Enables loading / saving of models within a step.
977
839
 
978
840
  > Examples
979
-
980
- - Saving Checkpoints
981
-
841
+ - Saving Models
982
842
  ```python
983
- @checkpoint
843
+ @model
984
844
  @step
985
845
  def train(self):
986
- model = create_model(self.parameters, checkpoint_path = None)
987
- for i in range(self.epochs):
988
- # some training logic
989
- loss = model.train(self.dataset)
990
- if i % 10 == 0:
991
- model.save(
992
- current.checkpoint.directory,
993
- )
994
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
995
- # and returns a reference dictionary to the checkpoint saved in the datastore
996
- self.latest_checkpoint = current.checkpoint.save(
997
- name="epoch_checkpoint",
998
- metadata={
999
- "epoch": i,
1000
- "loss": loss,
1001
- }
1002
- )
1003
- ```
846
+ # current.model.save returns a dictionary reference to the model saved
847
+ self.my_model = current.model.save(
848
+ path_to_my_model,
849
+ label="my_model",
850
+ metadata={
851
+ "epochs": 10,
852
+ "batch-size": 32,
853
+ "learning-rate": 0.001,
854
+ }
855
+ )
856
+ self.next(self.test)
1004
857
 
1005
- - Using Loaded Checkpoints
858
+ @model(load="my_model")
859
+ @step
860
+ def test(self):
861
+ # `current.model.loaded` returns a dictionary of the loaded models
862
+ # where the key is the name of the artifact and the value is the path to the model
863
+ print(os.listdir(current.model.loaded["my_model"]))
864
+ self.next(self.end)
865
+ ```
1006
866
 
867
+ - Loading models
1007
868
  ```python
1008
- @retry(times=3)
1009
- @checkpoint
1010
869
  @step
1011
870
  def train(self):
1012
- # Assume that the task has restarted and the previous attempt of the task
1013
- # saved a checkpoint
1014
- checkpoint_path = None
1015
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1016
- print("Loaded checkpoint from the previous attempt")
1017
- checkpoint_path = current.checkpoint.directory
1018
-
1019
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1020
- for i in range(self.epochs):
1021
- ...
871
+ # current.model.load returns the path to the model loaded
872
+ checkpoint_path = current.model.load(
873
+ self.checkpoint_key,
874
+ )
875
+ model_path = current.model.load(
876
+ self.model,
877
+ )
878
+ self.next(self.test)
1022
879
  ```
1023
880
 
1024
881
 
1025
882
  Parameters
1026
883
  ----------
1027
- load_policy : str, default: "fresh"
1028
- The policy for loading the checkpoint. The following policies are supported:
1029
- - "eager": Loads the the latest available checkpoint within the namespace.
1030
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1031
- will be loaded at the start of the task.
1032
- - "none": Do not load any checkpoint
1033
- - "fresh": Loads the lastest checkpoint created within the running Task.
1034
- This mode helps loading checkpoints across various retry attempts of the same task.
1035
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1036
- created within the task will be loaded when the task is retries execution on failure.
884
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
885
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
886
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
887
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
888
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
889
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
1037
890
 
1038
891
  temp_dir_root : str, default: None
1039
- The root directory under which `current.checkpoint.directory` will be created.
892
+ The root directory under which `current.model.loaded` will store loaded models
1040
893
  """
1041
894
  ...
1042
895
 
1043
- @typing.overload
1044
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
896
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1045
897
  """
1046
- Specifies a timeout for your step.
1047
-
1048
- This decorator is useful if this step may hang indefinitely.
1049
-
1050
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1051
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1052
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1053
-
1054
- Note that all the values specified in parameters are added together so if you specify
1055
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
898
+ Specifies that this step should execute on Kubernetes.
1056
899
 
1057
900
 
1058
901
  Parameters
1059
902
  ----------
1060
- seconds : int, default 0
1061
- Number of seconds to wait prior to timing out.
1062
- minutes : int, default 0
1063
- Number of minutes to wait prior to timing out.
1064
- hours : int, default 0
1065
- Number of hours to wait prior to timing out.
903
+ cpu : int, default 1
904
+ Number of CPUs required for this step. If `@resources` is
905
+ also present, the maximum value from all decorators is used.
906
+ memory : int, default 4096
907
+ Memory size (in MB) required for this step. If
908
+ `@resources` is also present, the maximum value from all decorators is
909
+ used.
910
+ disk : int, default 10240
911
+ Disk size (in MB) required for this step. If
912
+ `@resources` is also present, the maximum value from all decorators is
913
+ used.
914
+ image : str, optional, default None
915
+ Docker image to use when launching on Kubernetes. If not specified, and
916
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
917
+ not, a default Docker image mapping to the current version of Python is used.
918
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
919
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
920
+ image_pull_secrets: List[str], default []
921
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
922
+ Kubernetes image pull secrets to use when pulling container images
923
+ in Kubernetes.
924
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
925
+ Kubernetes service account to use when launching pod in Kubernetes.
926
+ secrets : List[str], optional, default None
927
+ Kubernetes secrets to use when launching pod in Kubernetes. These
928
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
929
+ in Metaflow configuration.
930
+ node_selector: Union[Dict[str,str], str], optional, default None
931
+ Kubernetes node selector(s) to apply to the pod running the task.
932
+ Can be passed in as a comma separated string of values e.g.
933
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
934
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
935
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
936
+ Kubernetes namespace to use when launching pod in Kubernetes.
937
+ gpu : int, optional, default None
938
+ Number of GPUs required for this step. A value of zero implies that
939
+ the scheduled node should not have GPUs.
940
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
941
+ The vendor of the GPUs to be used for this step.
942
+ tolerations : List[Dict[str,str]], default []
943
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
944
+ Kubernetes tolerations to use when launching pod in Kubernetes.
945
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
946
+ Kubernetes labels to use when launching pod in Kubernetes.
947
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
948
+ Kubernetes annotations to use when launching pod in Kubernetes.
949
+ use_tmpfs : bool, default False
950
+ This enables an explicit tmpfs mount for this step.
951
+ tmpfs_tempdir : bool, default True
952
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
953
+ tmpfs_size : int, optional, default: None
954
+ The value for the size (in MiB) of the tmpfs mount for this step.
955
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
956
+ memory allocated for this step.
957
+ tmpfs_path : str, optional, default /metaflow_temp
958
+ Path to tmpfs mount for this step.
959
+ persistent_volume_claims : Dict[str, str], optional, default None
960
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
961
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
962
+ shared_memory: int, optional
963
+ Shared memory size (in MiB) required for this step
964
+ port: int, optional
965
+ Port number to specify in the Kubernetes job object
966
+ compute_pool : str, optional, default None
967
+ Compute pool to be used for for this step.
968
+ If not specified, any accessible compute pool within the perimeter is used.
969
+ hostname_resolution_timeout: int, default 10 * 60
970
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
971
+ Only applicable when @parallel is used.
972
+ qos: str, default: Burstable
973
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
974
+
975
+ security_context: Dict[str, Any], optional, default None
976
+ Container security context. Applies to the task container. Allows the following keys:
977
+ - privileged: bool, optional, default None
978
+ - allow_privilege_escalation: bool, optional, default None
979
+ - run_as_user: int, optional, default None
980
+ - run_as_group: int, optional, default None
981
+ - run_as_non_root: bool, optional, default None
1066
982
  """
1067
983
  ...
1068
984
 
1069
- @typing.overload
1070
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1071
- ...
1072
-
1073
- @typing.overload
1074
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1075
- ...
1076
-
1077
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
985
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1078
986
  """
1079
- Specifies a timeout for your step.
987
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
1080
988
 
1081
- This decorator is useful if this step may hang indefinitely.
989
+ User code call
990
+ --------------
991
+ @ollama(
992
+ models=[...],
993
+ ...
994
+ )
1082
995
 
1083
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1084
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1085
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
996
+ Valid backend options
997
+ ---------------------
998
+ - 'local': Run as a separate process on the local task machine.
999
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1000
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1086
1001
 
1087
- Note that all the values specified in parameters are added together so if you specify
1088
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1002
+ Valid model options
1003
+ -------------------
1004
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
1089
1005
 
1090
1006
 
1091
1007
  Parameters
1092
1008
  ----------
1093
- seconds : int, default 0
1094
- Number of seconds to wait prior to timing out.
1095
- minutes : int, default 0
1096
- Number of minutes to wait prior to timing out.
1097
- hours : int, default 0
1098
- Number of hours to wait prior to timing out.
1009
+ models: list[str]
1010
+ List of Ollama containers running models in sidecars.
1011
+ backend: str
1012
+ Determines where and how to run the Ollama process.
1013
+ force_pull: bool
1014
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1015
+ cache_update_policy: str
1016
+ Cache update policy: "auto", "force", or "never".
1017
+ force_cache_update: bool
1018
+ Simple override for "force" cache update policy.
1019
+ debug: bool
1020
+ Whether to turn on verbose debugging logs.
1021
+ circuit_breaker_config: dict
1022
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1023
+ timeout_config: dict
1024
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
1099
1025
  """
1100
1026
  ...
1101
1027
 
1102
1028
  @typing.overload
1103
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1029
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1104
1030
  """
1105
- Specifies the Conda environment for the step.
1106
-
1107
- Information in this decorator will augment any
1108
- attributes set in the `@conda_base` flow-level decorator. Hence,
1109
- you can use `@conda_base` to set packages required by all
1110
- steps and use `@conda` to specify step-specific overrides.
1111
-
1112
-
1113
- Parameters
1114
- ----------
1115
- packages : Dict[str, str], default {}
1116
- Packages to use for this step. The key is the name of the package
1117
- and the value is the version to use.
1118
- libraries : Dict[str, str], default {}
1119
- Supported for backward compatibility. When used with packages, packages will take precedence.
1120
- python : str, optional, default None
1121
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1122
- that the version used will correspond to the version of the Python interpreter used to start the run.
1123
- disabled : bool, default False
1124
- If set to True, disables @conda.
1031
+ Decorator prototype for all step decorators. This function gets specialized
1032
+ and imported for all decorators types by _import_plugin_decorators().
1125
1033
  """
1126
1034
  ...
1127
1035
 
1128
1036
  @typing.overload
1129
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1037
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1130
1038
  ...
1131
1039
 
1132
- @typing.overload
1133
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1040
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1041
+ """
1042
+ Decorator prototype for all step decorators. This function gets specialized
1043
+ and imported for all decorators types by _import_plugin_decorators().
1044
+ """
1134
1045
  ...
1135
1046
 
1136
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1047
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1137
1048
  """
1138
- Specifies the Conda environment for the step.
1139
-
1140
- Information in this decorator will augment any
1141
- attributes set in the `@conda_base` flow-level decorator. Hence,
1142
- you can use `@conda_base` to set packages required by all
1143
- steps and use `@conda` to specify step-specific overrides.
1049
+ Specifies that this step should execute on DGX cloud.
1144
1050
 
1145
1051
 
1146
1052
  Parameters
1147
1053
  ----------
1148
- packages : Dict[str, str], default {}
1149
- Packages to use for this step. The key is the name of the package
1150
- and the value is the version to use.
1151
- libraries : Dict[str, str], default {}
1152
- Supported for backward compatibility. When used with packages, packages will take precedence.
1153
- python : str, optional, default None
1154
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1155
- that the version used will correspond to the version of the Python interpreter used to start the run.
1156
- disabled : bool, default False
1157
- If set to True, disables @conda.
1054
+ gpu : int
1055
+ Number of GPUs to use.
1056
+ gpu_type : str
1057
+ Type of Nvidia GPU to use.
1058
+ queue_timeout : int
1059
+ Time to keep the job in NVCF's queue.
1158
1060
  """
1159
1061
  ...
1160
1062
 
1161
- @typing.overload
1162
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1063
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1163
1064
  """
1164
- Specifies the resources needed when executing this step.
1165
-
1166
- Use `@resources` to specify the resource requirements
1167
- independently of the specific compute layer (`@batch`, `@kubernetes`).
1168
-
1169
- You can choose the compute layer on the command line by executing e.g.
1170
- ```
1171
- python myflow.py run --with batch
1172
- ```
1173
- or
1174
- ```
1175
- python myflow.py run --with kubernetes
1176
- ```
1177
- which executes the flow on the desired system using the
1178
- requirements specified in `@resources`.
1065
+ Specifies that this step should execute on DGX cloud.
1179
1066
 
1180
1067
 
1181
1068
  Parameters
1182
1069
  ----------
1183
- cpu : int, default 1
1184
- Number of CPUs required for this step.
1185
- gpu : int, optional, default None
1186
- Number of GPUs required for this step.
1187
- disk : int, optional, default None
1188
- Disk size (in MB) required for this step. Only applies on Kubernetes.
1189
- memory : int, default 4096
1190
- Memory size (in MB) required for this step.
1191
- shared_memory : int, optional, default None
1192
- The value for the size (in MiB) of the /dev/shm volume for this step.
1193
- This parameter maps to the `--shm-size` option in Docker.
1070
+ gpu : int
1071
+ Number of GPUs to use.
1072
+ gpu_type : str
1073
+ Type of Nvidia GPU to use.
1194
1074
  """
1195
1075
  ...
1196
1076
 
1197
1077
  @typing.overload
1198
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1199
- ...
1200
-
1201
- @typing.overload
1202
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1203
- ...
1204
-
1205
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1078
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1206
1079
  """
1207
- Specifies the resources needed when executing this step.
1208
-
1209
- Use `@resources` to specify the resource requirements
1210
- independently of the specific compute layer (`@batch`, `@kubernetes`).
1211
-
1212
- You can choose the compute layer on the command line by executing e.g.
1213
- ```
1214
- python myflow.py run --with batch
1215
- ```
1216
- or
1217
- ```
1218
- python myflow.py run --with kubernetes
1219
- ```
1220
- which executes the flow on the desired system using the
1221
- requirements specified in `@resources`.
1080
+ Specifies environment variables to be set prior to the execution of a step.
1222
1081
 
1223
1082
 
1224
1083
  Parameters
1225
1084
  ----------
1226
- cpu : int, default 1
1227
- Number of CPUs required for this step.
1228
- gpu : int, optional, default None
1229
- Number of GPUs required for this step.
1230
- disk : int, optional, default None
1231
- Disk size (in MB) required for this step. Only applies on Kubernetes.
1232
- memory : int, default 4096
1233
- Memory size (in MB) required for this step.
1234
- shared_memory : int, optional, default None
1235
- The value for the size (in MiB) of the /dev/shm volume for this step.
1236
- This parameter maps to the `--shm-size` option in Docker.
1085
+ vars : Dict[str, str], default {}
1086
+ Dictionary of environment variables to set.
1237
1087
  """
1238
1088
  ...
1239
1089
 
1240
1090
  @typing.overload
1241
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1242
- """
1243
- A simple decorator that demonstrates using CardDecoratorInjector
1244
- to inject a card and render simple markdown content.
1245
- """
1091
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1246
1092
  ...
1247
1093
 
1248
1094
  @typing.overload
1249
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1250
- ...
1251
-
1252
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1253
- """
1254
- A simple decorator that demonstrates using CardDecoratorInjector
1255
- to inject a card and render simple markdown content.
1256
- """
1095
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1257
1096
  ...
1258
1097
 
1259
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1098
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1260
1099
  """
1261
- Specifies that this step should execute on DGX cloud.
1100
+ Specifies environment variables to be set prior to the execution of a step.
1262
1101
 
1263
1102
 
1264
1103
  Parameters
1265
1104
  ----------
1266
- gpu : int
1267
- Number of GPUs to use.
1268
- gpu_type : str
1269
- Type of Nvidia GPU to use.
1270
- queue_timeout : int
1271
- Time to keep the job in NVCF's queue.
1105
+ vars : Dict[str, str], default {}
1106
+ Dictionary of environment variables to set.
1272
1107
  """
1273
1108
  ...
1274
1109
 
1275
1110
  @typing.overload
1276
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1111
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1277
1112
  """
1278
- Specifies secrets to be retrieved and injected as environment variables prior to
1279
- the execution of a step.
1113
+ Specifies a timeout for your step.
1114
+
1115
+ This decorator is useful if this step may hang indefinitely.
1116
+
1117
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1118
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1119
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1120
+
1121
+ Note that all the values specified in parameters are added together so if you specify
1122
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1280
1123
 
1281
1124
 
1282
1125
  Parameters
1283
1126
  ----------
1284
- sources : List[Union[str, Dict[str, Any]]], default: []
1285
- List of secret specs, defining how the secrets are to be retrieved
1286
- role : str, optional, default: None
1287
- Role to use for fetching secrets
1127
+ seconds : int, default 0
1128
+ Number of seconds to wait prior to timing out.
1129
+ minutes : int, default 0
1130
+ Number of minutes to wait prior to timing out.
1131
+ hours : int, default 0
1132
+ Number of hours to wait prior to timing out.
1288
1133
  """
1289
1134
  ...
1290
1135
 
1291
1136
  @typing.overload
1292
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1137
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1293
1138
  ...
1294
1139
 
1295
1140
  @typing.overload
1296
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1141
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1297
1142
  ...
1298
1143
 
1299
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1144
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1300
1145
  """
1301
- Specifies secrets to be retrieved and injected as environment variables prior to
1302
- the execution of a step.
1146
+ Specifies a timeout for your step.
1147
+
1148
+ This decorator is useful if this step may hang indefinitely.
1149
+
1150
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1151
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1152
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1153
+
1154
+ Note that all the values specified in parameters are added together so if you specify
1155
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1303
1156
 
1304
1157
 
1305
1158
  Parameters
1306
1159
  ----------
1307
- sources : List[Union[str, Dict[str, Any]]], default: []
1308
- List of secret specs, defining how the secrets are to be retrieved
1309
- role : str, optional, default: None
1310
- Role to use for fetching secrets
1160
+ seconds : int, default 0
1161
+ Number of seconds to wait prior to timing out.
1162
+ minutes : int, default 0
1163
+ Number of minutes to wait prior to timing out.
1164
+ hours : int, default 0
1165
+ Number of hours to wait prior to timing out.
1311
1166
  """
1312
1167
  ...
1313
1168
 
1314
- def coreweave_s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1169
+ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1315
1170
  """
1316
- `@coreweave_s3_proxy` is a CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1317
- It exists to make it easier for users to know that this decorator should only be used with
1318
- a Neo Cloud like CoreWeave. The underlying mechanics of the decorator is the same as the `@s3_proxy`:
1319
-
1320
-
1321
1171
  Set up an S3 proxy that caches objects in an external, S3‑compatible bucket
1322
1172
  for S3 read and write requests.
1323
1173
 
@@ -1376,251 +1226,338 @@ def coreweave_s3_proxy(*, integration_name: typing.Optional[str] = None, write_m
1376
1226
  ...
1377
1227
 
1378
1228
  @typing.overload
1379
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1229
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1380
1230
  """
1381
- Specifies environment variables to be set prior to the execution of a step.
1231
+ Specifies secrets to be retrieved and injected as environment variables prior to
1232
+ the execution of a step.
1382
1233
 
1383
1234
 
1384
1235
  Parameters
1385
1236
  ----------
1386
- vars : Dict[str, str], default {}
1387
- Dictionary of environment variables to set.
1237
+ sources : List[Union[str, Dict[str, Any]]], default: []
1238
+ List of secret specs, defining how the secrets are to be retrieved
1239
+ role : str, optional, default: None
1240
+ Role to use for fetching secrets
1388
1241
  """
1389
1242
  ...
1390
1243
 
1391
1244
  @typing.overload
1392
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1245
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1393
1246
  ...
1394
1247
 
1395
1248
  @typing.overload
1396
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1249
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1397
1250
  ...
1398
1251
 
1399
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1252
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1400
1253
  """
1401
- Specifies environment variables to be set prior to the execution of a step.
1254
+ Specifies secrets to be retrieved and injected as environment variables prior to
1255
+ the execution of a step.
1402
1256
 
1403
1257
 
1404
1258
  Parameters
1405
1259
  ----------
1406
- vars : Dict[str, str], default {}
1407
- Dictionary of environment variables to set.
1260
+ sources : List[Union[str, Dict[str, Any]]], default: []
1261
+ List of secret specs, defining how the secrets are to be retrieved
1262
+ role : str, optional, default: None
1263
+ Role to use for fetching secrets
1408
1264
  """
1409
1265
  ...
1410
1266
 
1411
1267
  @typing.overload
1412
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1268
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1413
1269
  """
1414
- Specifies the PyPI packages for the step.
1270
+ Specifies the resources needed when executing this step.
1415
1271
 
1416
- Information in this decorator will augment any
1417
- attributes set in the `@pyi_base` flow-level decorator. Hence,
1418
- you can use `@pypi_base` to set packages required by all
1419
- steps and use `@pypi` to specify step-specific overrides.
1272
+ Use `@resources` to specify the resource requirements
1273
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1274
+
1275
+ You can choose the compute layer on the command line by executing e.g.
1276
+ ```
1277
+ python myflow.py run --with batch
1278
+ ```
1279
+ or
1280
+ ```
1281
+ python myflow.py run --with kubernetes
1282
+ ```
1283
+ which executes the flow on the desired system using the
1284
+ requirements specified in `@resources`.
1420
1285
 
1421
1286
 
1422
1287
  Parameters
1423
1288
  ----------
1424
- packages : Dict[str, str], default: {}
1425
- Packages to use for this step. The key is the name of the package
1426
- and the value is the version to use.
1427
- python : str, optional, default: None
1428
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1429
- that the version used will correspond to the version of the Python interpreter used to start the run.
1289
+ cpu : int, default 1
1290
+ Number of CPUs required for this step.
1291
+ gpu : int, optional, default None
1292
+ Number of GPUs required for this step.
1293
+ disk : int, optional, default None
1294
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1295
+ memory : int, default 4096
1296
+ Memory size (in MB) required for this step.
1297
+ shared_memory : int, optional, default None
1298
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1299
+ This parameter maps to the `--shm-size` option in Docker.
1430
1300
  """
1431
1301
  ...
1432
1302
 
1433
1303
  @typing.overload
1434
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1304
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1435
1305
  ...
1436
1306
 
1437
1307
  @typing.overload
1438
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1308
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1439
1309
  ...
1440
1310
 
1441
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1311
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
1442
1312
  """
1443
- Specifies the PyPI packages for the step.
1313
+ Specifies the resources needed when executing this step.
1314
+
1315
+ Use `@resources` to specify the resource requirements
1316
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1317
+
1318
+ You can choose the compute layer on the command line by executing e.g.
1319
+ ```
1320
+ python myflow.py run --with batch
1321
+ ```
1322
+ or
1323
+ ```
1324
+ python myflow.py run --with kubernetes
1325
+ ```
1326
+ which executes the flow on the desired system using the
1327
+ requirements specified in `@resources`.
1328
+
1329
+
1330
+ Parameters
1331
+ ----------
1332
+ cpu : int, default 1
1333
+ Number of CPUs required for this step.
1334
+ gpu : int, optional, default None
1335
+ Number of GPUs required for this step.
1336
+ disk : int, optional, default None
1337
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1338
+ memory : int, default 4096
1339
+ Memory size (in MB) required for this step.
1340
+ shared_memory : int, optional, default None
1341
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1342
+ This parameter maps to the `--shm-size` option in Docker.
1343
+ """
1344
+ ...
1345
+
1346
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1347
+ """
1348
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
1349
+
1350
+ User code call
1351
+ --------------
1352
+ @vllm(
1353
+ model="...",
1354
+ ...
1355
+ )
1356
+
1357
+ Valid backend options
1358
+ ---------------------
1359
+ - 'local': Run as a separate process on the local task machine.
1360
+
1361
+ Valid model options
1362
+ -------------------
1363
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1364
+
1365
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1366
+ If you need multiple models, you must create multiple @vllm decorators.
1367
+
1368
+
1369
+ Parameters
1370
+ ----------
1371
+ model: str
1372
+ HuggingFace model identifier to be served by vLLM.
1373
+ backend: str
1374
+ Determines where and how to run the vLLM process.
1375
+ openai_api_server: bool
1376
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1377
+ Default is False (uses native engine).
1378
+ Set to True for backward compatibility with existing code.
1379
+ debug: bool
1380
+ Whether to turn on verbose debugging logs.
1381
+ card_refresh_interval: int
1382
+ Interval in seconds for refreshing the vLLM status card.
1383
+ Only used when openai_api_server=True.
1384
+ max_retries: int
1385
+ Maximum number of retries checking for vLLM server startup.
1386
+ Only used when openai_api_server=True.
1387
+ retry_alert_frequency: int
1388
+ Frequency of alert logs for vLLM server startup retries.
1389
+ Only used when openai_api_server=True.
1390
+ engine_args : dict
1391
+ Additional keyword arguments to pass to the vLLM engine.
1392
+ For example, `tensor_parallel_size=2`.
1393
+ """
1394
+ ...
1395
+
1396
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, cache_scope: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1397
+ """
1398
+ Decorator that helps cache, version, and store models/datasets from the Hugging Face Hub.
1399
+
1400
+ Examples
1401
+ --------
1402
+
1403
+ ```python
1404
+ # **Usage: creating references to models from the Hugging Face Hub that may be loaded in downstream steps**
1405
+ @huggingface_hub
1406
+ @step
1407
+ def pull_model_from_huggingface(self):
1408
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
1409
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
1410
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
1411
+ # value of the function is a reference to the model in the backend storage.
1412
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
1413
+
1414
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
1415
+ self.llama_model = current.huggingface_hub.snapshot_download(
1416
+ repo_id=self.model_id,
1417
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
1418
+ )
1419
+ self.next(self.train)
1420
+
1421
+ # **Usage: explicitly loading models at runtime from the Hugging Face Hub or from cache (from Metaflow's datastore)**
1422
+ @huggingface_hub
1423
+ @step
1424
+ def run_training(self):
1425
+ # Temporary directory (auto-cleaned on exit)
1426
+ with current.huggingface_hub.load(
1427
+ repo_id="google-bert/bert-base-uncased",
1428
+ allow_patterns=["*.bin"],
1429
+ ) as local_path:
1430
+ # Use files under local_path
1431
+ train_model(local_path)
1432
+ ...
1433
+
1434
+ # **Usage: loading models directly from the Hugging Face Hub or from cache (from Metaflow's datastore)**
1435
+
1436
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
1437
+ @step
1438
+ def pull_model_from_huggingface(self):
1439
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1440
+
1441
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora", "/my-lora-directory")])
1442
+ @step
1443
+ def finetune_model(self):
1444
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1445
+ # path_to_model will be /my-directory
1446
+
1447
+
1448
+ # Takes all the arguments passed to `snapshot_download`
1449
+ # except for `local_dir`
1450
+ @huggingface_hub(load=[
1451
+ {
1452
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
1453
+ },
1454
+ {
1455
+ "repo_id": "myorg/mistral-lora",
1456
+ "repo_type": "model",
1457
+ },
1458
+ ])
1459
+ @step
1460
+ def finetune_model(self):
1461
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1462
+ # path_to_model will be /my-directory
1463
+ ```
1464
+
1465
+
1466
+ Parameters
1467
+ ----------
1468
+ temp_dir_root : str, optional
1469
+ The root directory that will hold the temporary directory where objects will be downloaded.
1470
+
1471
+ cache_scope : str, optional
1472
+ The scope of the cache. Can be `checkpoint` / `flow` / `global`.
1473
+ - `checkpoint` (default): All repos are stored like objects saved by `@checkpoint`.
1474
+ i.e., the cached path is derived from the namespace, flow, step, and Metaflow foreach iteration.
1475
+ Any repo downloaded under this scope will only be retrieved from the cache when the step runs under the same namespace in the same flow (at the same foreach index).
1476
+
1477
+ - `flow`: All repos are cached under the flow, regardless of namespace.
1478
+ i.e., the cached path is derived solely from the flow name.
1479
+ When to use this mode: (1) Multiple users are executing the same flow and want shared access to the repos cached by the decorator. (2) Multiple versions of a flow are deployed, all needing access to the same repos cached by the decorator.
1480
+
1481
+ - `global`: All repos are cached under a globally static path.
1482
+ i.e., the base path of the cache is static and all repos are stored under it.
1483
+ When to use this mode:
1484
+ - All repos from the Hugging Face Hub need to be shared by users across all flow executions.
1485
+ - Each caching scope comes with its own trade-offs:
1486
+ - `checkpoint`:
1487
+ - Has explicit control over when caches are populated (controlled by the same flow that has the `@huggingface_hub` decorator) but ends up hitting the Hugging Face Hub more often if there are many users/namespaces/steps.
1488
+ - Since objects are written on a `namespace/flow/step` basis, the blast radius of a bad checkpoint is limited to a particular flow in a namespace.
1489
+ - `flow`:
1490
+ - Has less control over when caches are populated (can be written by any execution instance of a flow from any namespace) but results in more cache hits.
1491
+ - The blast radius of a bad checkpoint is limited to all runs of a particular flow.
1492
+ - It doesn't promote cache reuse across flows.
1493
+ - `global`:
1494
+ - Has no control over when caches are populated (can be written by any flow execution) but has the highest cache hit rate.
1495
+ - It promotes cache reuse across flows.
1496
+ - The blast radius of a bad checkpoint spans every flow that could be using a particular repo.
1444
1497
 
1445
- Information in this decorator will augment any
1446
- attributes set in the `@pyi_base` flow-level decorator. Hence,
1447
- you can use `@pypi_base` to set packages required by all
1448
- steps and use `@pypi` to specify step-specific overrides.
1498
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
1499
+ The list of repos (models/datasets) to load.
1449
1500
 
1501
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
1450
1502
 
1451
- Parameters
1452
- ----------
1453
- packages : Dict[str, str], default: {}
1454
- Packages to use for this step. The key is the name of the package
1455
- and the value is the version to use.
1456
- python : str, optional, default: None
1457
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1458
- that the version used will correspond to the version of the Python interpreter used to start the run.
1503
+ - If repo (model/dataset) is not found in the datastore:
1504
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
1505
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
1506
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
1507
+
1508
+ - If repo is found in the datastore:
1509
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
1459
1510
  """
1460
1511
  ...
1461
1512
 
1462
1513
  @typing.overload
1463
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1514
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1464
1515
  """
1465
- Specifies that the step will success under all circumstances.
1466
-
1467
- The decorator will create an optional artifact, specified by `var`, which
1468
- contains the exception raised. You can use it to detect the presence
1469
- of errors, indicating that all happy-path artifacts produced by the step
1470
- are missing.
1516
+ Specifies the times when the flow should be run when running on a
1517
+ production scheduler.
1471
1518
 
1472
1519
 
1473
1520
  Parameters
1474
1521
  ----------
1475
- var : str, optional, default None
1476
- Name of the artifact in which to store the caught exception.
1477
- If not specified, the exception is not stored.
1478
- print_exception : bool, default True
1479
- Determines whether or not the exception is printed to
1480
- stdout when caught.
1522
+ hourly : bool, default False
1523
+ Run the workflow hourly.
1524
+ daily : bool, default True
1525
+ Run the workflow daily.
1526
+ weekly : bool, default False
1527
+ Run the workflow weekly.
1528
+ cron : str, optional, default None
1529
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1530
+ specified by this expression.
1531
+ timezone : str, optional, default None
1532
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1533
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1481
1534
  """
1482
1535
  ...
1483
1536
 
1484
1537
  @typing.overload
1485
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1486
- ...
1487
-
1488
- @typing.overload
1489
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1538
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1490
1539
  ...
1491
1540
 
1492
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1541
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1493
1542
  """
1494
- Specifies that the step will success under all circumstances.
1495
-
1496
- The decorator will create an optional artifact, specified by `var`, which
1497
- contains the exception raised. You can use it to detect the presence
1498
- of errors, indicating that all happy-path artifacts produced by the step
1499
- are missing.
1543
+ Specifies the times when the flow should be run when running on a
1544
+ production scheduler.
1500
1545
 
1501
1546
 
1502
1547
  Parameters
1503
1548
  ----------
1504
- var : str, optional, default None
1505
- Name of the artifact in which to store the caught exception.
1506
- If not specified, the exception is not stored.
1507
- print_exception : bool, default True
1508
- Determines whether or not the exception is printed to
1509
- stdout when caught.
1510
- """
1511
- ...
1512
-
1513
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1514
- """
1515
- Allows setting external datastores to save data for the
1516
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1517
-
1518
- This decorator is useful when users wish to save data to a different datastore
1519
- than what is configured in Metaflow. This can be for variety of reasons:
1520
-
1521
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1522
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1523
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1524
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1525
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1526
-
1527
- Usage:
1528
- ----------
1529
-
1530
- - Using a custom IAM role to access the datastore.
1531
-
1532
- ```python
1533
- @with_artifact_store(
1534
- type="s3",
1535
- config=lambda: {
1536
- "root": "s3://my-bucket-foo/path/to/root",
1537
- "role_arn": ROLE,
1538
- },
1539
- )
1540
- class MyFlow(FlowSpec):
1541
-
1542
- @checkpoint
1543
- @step
1544
- def start(self):
1545
- with open("my_file.txt", "w") as f:
1546
- f.write("Hello, World!")
1547
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1548
- self.next(self.end)
1549
-
1550
- ```
1551
-
1552
- - Using credentials to access the s3-compatible datastore.
1553
-
1554
- ```python
1555
- @with_artifact_store(
1556
- type="s3",
1557
- config=lambda: {
1558
- "root": "s3://my-bucket-foo/path/to/root",
1559
- "client_params": {
1560
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1561
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1562
- },
1563
- },
1564
- )
1565
- class MyFlow(FlowSpec):
1566
-
1567
- @checkpoint
1568
- @step
1569
- def start(self):
1570
- with open("my_file.txt", "w") as f:
1571
- f.write("Hello, World!")
1572
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1573
- self.next(self.end)
1574
-
1575
- ```
1576
-
1577
- - Accessing objects stored in external datastores after task execution.
1578
-
1579
- ```python
1580
- run = Run("CheckpointsTestsFlow/8992")
1581
- with artifact_store_from(run=run, config={
1582
- "client_params": {
1583
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1584
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1585
- },
1586
- }):
1587
- with Checkpoint() as cp:
1588
- latest = cp.list(
1589
- task=run["start"].task
1590
- )[0]
1591
- print(latest)
1592
- cp.load(
1593
- latest,
1594
- "test-checkpoints"
1595
- )
1596
-
1597
- task = Task("TorchTuneFlow/8484/train/53673")
1598
- with artifact_store_from(run=run, config={
1599
- "client_params": {
1600
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1601
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1602
- },
1603
- }):
1604
- load_model(
1605
- task.data.model_ref,
1606
- "test-models"
1607
- )
1608
- ```
1609
- Parameters:
1610
- ----------
1611
-
1612
- type: str
1613
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1614
-
1615
- config: dict or Callable
1616
- Dictionary of configuration options for the datastore. The following keys are required:
1617
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1618
- - example: 's3://bucket-name/path/to/root'
1619
- - example: 'gs://bucket-name/path/to/root'
1620
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1621
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1622
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1623
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1549
+ hourly : bool, default False
1550
+ Run the workflow hourly.
1551
+ daily : bool, default True
1552
+ Run the workflow daily.
1553
+ weekly : bool, default False
1554
+ Run the workflow weekly.
1555
+ cron : str, optional, default None
1556
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1557
+ specified by this expression.
1558
+ timezone : str, optional, default None
1559
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1560
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1624
1561
  """
1625
1562
  ...
1626
1563
 
@@ -1652,59 +1589,18 @@ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str,
1652
1589
  external_dag_id : str
1653
1590
  The dag_id that contains the task you want to wait for.
1654
1591
  external_task_ids : List[str]
1655
- The list of task_ids that you want to wait for.
1656
- If None (default value) the sensor waits for the DAG. (Default: None)
1657
- allowed_states : List[str]
1658
- Iterable of allowed states, (Default: ['success'])
1659
- failed_states : List[str]
1660
- Iterable of failed or dis-allowed states. (Default: None)
1661
- execution_delta : datetime.timedelta
1662
- time difference with the previous execution to look at,
1663
- the default is the same logical date as the current task or DAG. (Default: None)
1664
- check_existence: bool
1665
- Set to True to check if the external task exists or check if
1666
- the DAG to wait for exists. (Default: True)
1667
- """
1668
- ...
1669
-
1670
- @typing.overload
1671
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1672
- """
1673
- Specifies the PyPI packages for all steps of the flow.
1674
-
1675
- Use `@pypi_base` to set common packages required by all
1676
- steps and use `@pypi` to specify step-specific overrides.
1677
-
1678
- Parameters
1679
- ----------
1680
- packages : Dict[str, str], default: {}
1681
- Packages to use for this flow. The key is the name of the package
1682
- and the value is the version to use.
1683
- python : str, optional, default: None
1684
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1685
- that the version used will correspond to the version of the Python interpreter used to start the run.
1686
- """
1687
- ...
1688
-
1689
- @typing.overload
1690
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1691
- ...
1692
-
1693
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1694
- """
1695
- Specifies the PyPI packages for all steps of the flow.
1696
-
1697
- Use `@pypi_base` to set common packages required by all
1698
- steps and use `@pypi` to specify step-specific overrides.
1699
-
1700
- Parameters
1701
- ----------
1702
- packages : Dict[str, str], default: {}
1703
- Packages to use for this flow. The key is the name of the package
1704
- and the value is the version to use.
1705
- python : str, optional, default: None
1706
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1707
- that the version used will correspond to the version of the Python interpreter used to start the run.
1592
+ The list of task_ids that you want to wait for.
1593
+ If None (default value) the sensor waits for the DAG. (Default: None)
1594
+ allowed_states : List[str]
1595
+ Iterable of allowed states, (Default: ['success'])
1596
+ failed_states : List[str]
1597
+ Iterable of failed or dis-allowed states. (Default: None)
1598
+ execution_delta : datetime.timedelta
1599
+ time difference with the previous execution to look at,
1600
+ the default is the same logical date as the current task or DAG. (Default: None)
1601
+ check_existence: bool
1602
+ Set to True to check if the external task exists or check if
1603
+ the DAG to wait for exists. (Default: True)
1708
1604
  """
1709
1605
  ...
1710
1606
 
@@ -1751,6 +1647,47 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
1751
1647
  """
1752
1648
  ...
1753
1649
 
1650
+ @typing.overload
1651
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1652
+ """
1653
+ Specifies the PyPI packages for all steps of the flow.
1654
+
1655
+ Use `@pypi_base` to set common packages required by all
1656
+ steps and use `@pypi` to specify step-specific overrides.
1657
+
1658
+ Parameters
1659
+ ----------
1660
+ packages : Dict[str, str], default: {}
1661
+ Packages to use for this flow. The key is the name of the package
1662
+ and the value is the version to use.
1663
+ python : str, optional, default: None
1664
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1665
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1666
+ """
1667
+ ...
1668
+
1669
+ @typing.overload
1670
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1671
+ ...
1672
+
1673
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1674
+ """
1675
+ Specifies the PyPI packages for all steps of the flow.
1676
+
1677
+ Use `@pypi_base` to set common packages required by all
1678
+ steps and use `@pypi` to specify step-specific overrides.
1679
+
1680
+ Parameters
1681
+ ----------
1682
+ packages : Dict[str, str], default: {}
1683
+ Packages to use for this flow. The key is the name of the package
1684
+ and the value is the version to use.
1685
+ python : str, optional, default: None
1686
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1687
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1688
+ """
1689
+ ...
1690
+
1754
1691
  def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1755
1692
  """
1756
1693
  Specifies what flows belong to the same project.
@@ -1787,53 +1724,53 @@ def project(*, name: str, branch: typing.Optional[str] = None, production: bool
1787
1724
  ...
1788
1725
 
1789
1726
  @typing.overload
1790
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1727
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1791
1728
  """
1792
- Specifies the times when the flow should be run when running on a
1793
- production scheduler.
1729
+ Specifies the Conda environment for all steps of the flow.
1730
+
1731
+ Use `@conda_base` to set common libraries required by all
1732
+ steps and use `@conda` to specify step-specific additions.
1794
1733
 
1795
1734
 
1796
1735
  Parameters
1797
1736
  ----------
1798
- hourly : bool, default False
1799
- Run the workflow hourly.
1800
- daily : bool, default True
1801
- Run the workflow daily.
1802
- weekly : bool, default False
1803
- Run the workflow weekly.
1804
- cron : str, optional, default None
1805
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1806
- specified by this expression.
1807
- timezone : str, optional, default None
1808
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1809
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1737
+ packages : Dict[str, str], default {}
1738
+ Packages to use for this flow. The key is the name of the package
1739
+ and the value is the version to use.
1740
+ libraries : Dict[str, str], default {}
1741
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1742
+ python : str, optional, default None
1743
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1744
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1745
+ disabled : bool, default False
1746
+ If set to True, disables Conda.
1810
1747
  """
1811
1748
  ...
1812
1749
 
1813
1750
  @typing.overload
1814
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1751
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1815
1752
  ...
1816
1753
 
1817
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1754
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1818
1755
  """
1819
- Specifies the times when the flow should be run when running on a
1820
- production scheduler.
1756
+ Specifies the Conda environment for all steps of the flow.
1757
+
1758
+ Use `@conda_base` to set common libraries required by all
1759
+ steps and use `@conda` to specify step-specific additions.
1821
1760
 
1822
1761
 
1823
1762
  Parameters
1824
1763
  ----------
1825
- hourly : bool, default False
1826
- Run the workflow hourly.
1827
- daily : bool, default True
1828
- Run the workflow daily.
1829
- weekly : bool, default False
1830
- Run the workflow weekly.
1831
- cron : str, optional, default None
1832
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1833
- specified by this expression.
1834
- timezone : str, optional, default None
1835
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1836
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1764
+ packages : Dict[str, str], default {}
1765
+ Packages to use for this flow. The key is the name of the package
1766
+ and the value is the version to use.
1767
+ libraries : Dict[str, str], default {}
1768
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1769
+ python : str, optional, default None
1770
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1771
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1772
+ disabled : bool, default False
1773
+ If set to True, disables Conda.
1837
1774
  """
1838
1775
  ...
1839
1776
 
@@ -1930,54 +1867,117 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1930
1867
  """
1931
1868
  ...
1932
1869
 
1933
- @typing.overload
1934
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1870
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1935
1871
  """
1936
- Specifies the Conda environment for all steps of the flow.
1872
+ Allows setting external datastores to save data for the
1873
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1937
1874
 
1938
- Use `@conda_base` to set common libraries required by all
1939
- steps and use `@conda` to specify step-specific additions.
1875
+ This decorator is useful when users wish to save data to a different datastore
1876
+ than what is configured in Metaflow. This can be for variety of reasons:
1940
1877
 
1878
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1879
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1880
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1881
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1882
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1941
1883
 
1942
- Parameters
1884
+ Usage:
1943
1885
  ----------
1944
- packages : Dict[str, str], default {}
1945
- Packages to use for this flow. The key is the name of the package
1946
- and the value is the version to use.
1947
- libraries : Dict[str, str], default {}
1948
- Supported for backward compatibility. When used with packages, packages will take precedence.
1949
- python : str, optional, default None
1950
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1951
- that the version used will correspond to the version of the Python interpreter used to start the run.
1952
- disabled : bool, default False
1953
- If set to True, disables Conda.
1954
- """
1955
- ...
1956
-
1957
- @typing.overload
1958
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1959
- ...
1960
-
1961
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1962
- """
1963
- Specifies the Conda environment for all steps of the flow.
1964
1886
 
1965
- Use `@conda_base` to set common libraries required by all
1966
- steps and use `@conda` to specify step-specific additions.
1887
+ - Using a custom IAM role to access the datastore.
1888
+
1889
+ ```python
1890
+ @with_artifact_store(
1891
+ type="s3",
1892
+ config=lambda: {
1893
+ "root": "s3://my-bucket-foo/path/to/root",
1894
+ "role_arn": ROLE,
1895
+ },
1896
+ )
1897
+ class MyFlow(FlowSpec):
1898
+
1899
+ @checkpoint
1900
+ @step
1901
+ def start(self):
1902
+ with open("my_file.txt", "w") as f:
1903
+ f.write("Hello, World!")
1904
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1905
+ self.next(self.end)
1967
1906
 
1907
+ ```
1968
1908
 
1969
- Parameters
1909
+ - Using credentials to access the s3-compatible datastore.
1910
+
1911
+ ```python
1912
+ @with_artifact_store(
1913
+ type="s3",
1914
+ config=lambda: {
1915
+ "root": "s3://my-bucket-foo/path/to/root",
1916
+ "client_params": {
1917
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1918
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1919
+ },
1920
+ },
1921
+ )
1922
+ class MyFlow(FlowSpec):
1923
+
1924
+ @checkpoint
1925
+ @step
1926
+ def start(self):
1927
+ with open("my_file.txt", "w") as f:
1928
+ f.write("Hello, World!")
1929
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1930
+ self.next(self.end)
1931
+
1932
+ ```
1933
+
1934
+ - Accessing objects stored in external datastores after task execution.
1935
+
1936
+ ```python
1937
+ run = Run("CheckpointsTestsFlow/8992")
1938
+ with artifact_store_from(run=run, config={
1939
+ "client_params": {
1940
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1941
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1942
+ },
1943
+ }):
1944
+ with Checkpoint() as cp:
1945
+ latest = cp.list(
1946
+ task=run["start"].task
1947
+ )[0]
1948
+ print(latest)
1949
+ cp.load(
1950
+ latest,
1951
+ "test-checkpoints"
1952
+ )
1953
+
1954
+ task = Task("TorchTuneFlow/8484/train/53673")
1955
+ with artifact_store_from(run=run, config={
1956
+ "client_params": {
1957
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1958
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1959
+ },
1960
+ }):
1961
+ load_model(
1962
+ task.data.model_ref,
1963
+ "test-models"
1964
+ )
1965
+ ```
1966
+ Parameters:
1970
1967
  ----------
1971
- packages : Dict[str, str], default {}
1972
- Packages to use for this flow. The key is the name of the package
1973
- and the value is the version to use.
1974
- libraries : Dict[str, str], default {}
1975
- Supported for backward compatibility. When used with packages, packages will take precedence.
1976
- python : str, optional, default None
1977
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1978
- that the version used will correspond to the version of the Python interpreter used to start the run.
1979
- disabled : bool, default False
1980
- If set to True, disables Conda.
1968
+
1969
+ type: str
1970
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1971
+
1972
+ config: dict or Callable
1973
+ Dictionary of configuration options for the datastore. The following keys are required:
1974
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1975
+ - example: 's3://bucket-name/path/to/root'
1976
+ - example: 'gs://bucket-name/path/to/root'
1977
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1978
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1979
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1980
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1981
1981
  """
1982
1982
  ...
1983
1983