ob-metaflow-stubs 6.0.5.0__py2.py3-none-any.whl → 6.0.5.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. metaflow-stubs/__init__.pyi +1070 -1001
  2. metaflow-stubs/cards.pyi +1 -1
  3. metaflow-stubs/cli.pyi +1 -1
  4. metaflow-stubs/cli_components/__init__.pyi +1 -1
  5. metaflow-stubs/cli_components/utils.pyi +1 -1
  6. metaflow-stubs/client/__init__.pyi +1 -1
  7. metaflow-stubs/client/core.pyi +4 -4
  8. metaflow-stubs/client/filecache.pyi +1 -1
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +1 -1
  11. metaflow-stubs/flowspec.pyi +5 -5
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/meta_files.pyi +1 -1
  15. metaflow-stubs/metadata_provider/__init__.pyi +1 -1
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +1 -1
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +1 -1
  19. metaflow-stubs/metaflow_config.pyi +1 -1
  20. metaflow-stubs/metaflow_current.pyi +50 -50
  21. metaflow-stubs/metaflow_git.pyi +1 -1
  22. metaflow-stubs/mf_extensions/__init__.pyi +1 -1
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +1 -1
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +1 -1
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +1 -1
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +1 -1
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +1 -1
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +1 -1
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +2 -2
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +1 -1
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +1 -1
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +3 -3
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +1 -1
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +3 -3
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +1 -1
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +3 -3
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +5 -5
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +1 -1
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +2 -2
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +1 -1
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +1 -1
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +2 -2
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +1 -1
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +1 -1
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +1 -1
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +1 -1
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +1 -1
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +1 -1
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +1 -1
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +2 -2
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +1 -1
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +4 -4
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +1 -1
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +1 -1
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +1 -1
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +1 -1
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +1 -1
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +1 -1
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +1 -1
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +1 -1
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +1 -1
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +6 -5
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +1 -1
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +1 -1
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +1 -1
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +2 -2
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +1 -1
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +6 -6
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +1 -1
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +1 -1
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +1 -1
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +1 -1
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +1 -1
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +3 -3
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +1 -1
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +5 -1
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +11 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +2 -2
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +7 -8
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +1 -1
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +1 -1
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +5 -5
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +1 -1
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +1 -1
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +1 -1
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +1 -1
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +1 -1
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +1 -1
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +2 -2
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +2 -2
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +1 -1
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +3 -3
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +2 -2
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +1 -1
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +1 -1
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +1 -1
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +1 -1
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +1 -1
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +1 -1
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +1 -1
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +1 -1
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +1 -1
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +1 -1
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +1 -1
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +1 -1
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +1 -1
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +1 -1
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +12 -1
  115. metaflow-stubs/mf_extensions/outerbounds/toplevel/s3_proxy.pyi +24 -0
  116. metaflow-stubs/multicore_utils.pyi +1 -1
  117. metaflow-stubs/ob_internal.pyi +1 -1
  118. metaflow-stubs/packaging_sys/__init__.pyi +4 -4
  119. metaflow-stubs/packaging_sys/backend.pyi +3 -3
  120. metaflow-stubs/packaging_sys/distribution_support.pyi +5 -5
  121. metaflow-stubs/packaging_sys/tar_backend.pyi +4 -4
  122. metaflow-stubs/packaging_sys/utils.pyi +1 -1
  123. metaflow-stubs/packaging_sys/v1.pyi +2 -2
  124. metaflow-stubs/parameters.pyi +3 -3
  125. metaflow-stubs/plugins/__init__.pyi +10 -10
  126. metaflow-stubs/plugins/airflow/__init__.pyi +1 -1
  127. metaflow-stubs/plugins/airflow/airflow_utils.pyi +1 -1
  128. metaflow-stubs/plugins/airflow/exception.pyi +1 -1
  129. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +1 -1
  130. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +1 -1
  131. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +1 -1
  132. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +1 -1
  133. metaflow-stubs/plugins/argo/__init__.pyi +1 -1
  134. metaflow-stubs/plugins/argo/argo_client.pyi +1 -1
  135. metaflow-stubs/plugins/argo/argo_events.pyi +1 -1
  136. metaflow-stubs/plugins/argo/argo_workflows.pyi +2 -2
  137. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +3 -3
  139. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +2 -2
  140. metaflow-stubs/plugins/argo/exit_hooks.pyi +2 -2
  141. metaflow-stubs/plugins/aws/__init__.pyi +1 -1
  142. metaflow-stubs/plugins/aws/aws_client.pyi +1 -1
  143. metaflow-stubs/plugins/aws/aws_utils.pyi +1 -1
  144. metaflow-stubs/plugins/aws/batch/__init__.pyi +1 -1
  145. metaflow-stubs/plugins/aws/batch/batch.pyi +1 -1
  146. metaflow-stubs/plugins/aws/batch/batch_client.pyi +1 -1
  147. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +1 -1
  148. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +1 -1
  149. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +2 -2
  150. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +1 -1
  151. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +1 -1
  152. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +1 -1
  153. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +1 -1
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +1 -1
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +3 -3
  156. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +2 -2
  157. metaflow-stubs/plugins/azure/__init__.pyi +1 -1
  158. metaflow-stubs/plugins/azure/azure_credential.pyi +1 -1
  159. metaflow-stubs/plugins/azure/azure_exceptions.pyi +1 -1
  160. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +2 -2
  161. metaflow-stubs/plugins/azure/azure_utils.pyi +1 -1
  162. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +1 -1
  163. metaflow-stubs/plugins/azure/includefile_support.pyi +1 -1
  164. metaflow-stubs/plugins/cards/__init__.pyi +5 -5
  165. metaflow-stubs/plugins/cards/card_client.pyi +2 -2
  166. metaflow-stubs/plugins/cards/card_creator.pyi +1 -1
  167. metaflow-stubs/plugins/cards/card_datastore.pyi +1 -1
  168. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  169. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +1 -1
  170. metaflow-stubs/plugins/cards/card_modules/basic.pyi +2 -2
  171. metaflow-stubs/plugins/cards/card_modules/card.pyi +1 -1
  172. metaflow-stubs/plugins/cards/card_modules/components.pyi +4 -4
  173. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +1 -1
  174. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +1 -1
  175. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +1 -1
  176. metaflow-stubs/plugins/cards/card_resolver.pyi +1 -1
  177. metaflow-stubs/plugins/cards/component_serializer.pyi +1 -1
  178. metaflow-stubs/plugins/cards/exception.pyi +1 -1
  179. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  180. metaflow-stubs/plugins/datatools/__init__.pyi +1 -1
  181. metaflow-stubs/plugins/datatools/local.pyi +1 -1
  182. metaflow-stubs/plugins/datatools/s3/__init__.pyi +1 -1
  183. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  184. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +1 -1
  185. metaflow-stubs/plugins/datatools/s3/s3util.pyi +1 -1
  186. metaflow-stubs/plugins/debug_logger.pyi +1 -1
  187. metaflow-stubs/plugins/debug_monitor.pyi +1 -1
  188. metaflow-stubs/plugins/environment_decorator.pyi +1 -1
  189. metaflow-stubs/plugins/events_decorator.pyi +1 -1
  190. metaflow-stubs/plugins/exit_hook/__init__.pyi +1 -1
  191. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +1 -1
  192. metaflow-stubs/plugins/frameworks/__init__.pyi +1 -1
  193. metaflow-stubs/plugins/frameworks/pytorch.pyi +1 -1
  194. metaflow-stubs/plugins/gcp/__init__.pyi +1 -1
  195. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +2 -2
  196. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +1 -1
  197. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +1 -1
  198. metaflow-stubs/plugins/gcp/gs_utils.pyi +1 -1
  199. metaflow-stubs/plugins/gcp/includefile_support.pyi +1 -1
  200. metaflow-stubs/plugins/kubernetes/__init__.pyi +1 -1
  201. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  202. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +1 -1
  203. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +1 -1
  204. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +1 -1
  205. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +1 -1
  206. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +1 -1
  207. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  208. metaflow-stubs/plugins/parallel_decorator.pyi +1 -1
  209. metaflow-stubs/plugins/perimeters.pyi +1 -1
  210. metaflow-stubs/plugins/project_decorator.pyi +1 -1
  211. metaflow-stubs/plugins/pypi/__init__.pyi +2 -2
  212. metaflow-stubs/plugins/pypi/conda_decorator.pyi +1 -1
  213. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -4
  214. metaflow-stubs/plugins/pypi/parsers.pyi +1 -1
  215. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +1 -1
  216. metaflow-stubs/plugins/pypi/pypi_environment.pyi +1 -1
  217. metaflow-stubs/plugins/pypi/utils.pyi +1 -1
  218. metaflow-stubs/plugins/resources_decorator.pyi +1 -1
  219. metaflow-stubs/plugins/retry_decorator.pyi +1 -1
  220. metaflow-stubs/plugins/secrets/__init__.pyi +2 -2
  221. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +1 -1
  222. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +1 -1
  223. metaflow-stubs/plugins/secrets/secrets_func.pyi +1 -1
  224. metaflow-stubs/plugins/secrets/secrets_spec.pyi +1 -1
  225. metaflow-stubs/plugins/secrets/utils.pyi +1 -1
  226. metaflow-stubs/plugins/snowflake/__init__.pyi +1 -1
  227. metaflow-stubs/plugins/storage_executor.pyi +1 -1
  228. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +1 -1
  229. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  230. metaflow-stubs/plugins/torchtune/__init__.pyi +1 -1
  231. metaflow-stubs/plugins/uv/__init__.pyi +1 -1
  232. metaflow-stubs/plugins/uv/uv_environment.pyi +2 -2
  233. metaflow-stubs/profilers/__init__.pyi +1 -1
  234. metaflow-stubs/pylint_wrapper.pyi +1 -1
  235. metaflow-stubs/runner/__init__.pyi +1 -1
  236. metaflow-stubs/runner/deployer.pyi +33 -33
  237. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  238. metaflow-stubs/runner/metaflow_runner.pyi +2 -2
  239. metaflow-stubs/runner/nbdeploy.pyi +1 -1
  240. metaflow-stubs/runner/nbrun.pyi +1 -1
  241. metaflow-stubs/runner/subprocess_manager.pyi +1 -1
  242. metaflow-stubs/runner/utils.pyi +3 -3
  243. metaflow-stubs/system/__init__.pyi +1 -1
  244. metaflow-stubs/system/system_logger.pyi +2 -2
  245. metaflow-stubs/system/system_monitor.pyi +1 -1
  246. metaflow-stubs/tagging_util.pyi +1 -1
  247. metaflow-stubs/tuple_util.pyi +1 -1
  248. metaflow-stubs/user_configs/__init__.pyi +1 -1
  249. metaflow-stubs/user_configs/config_options.pyi +3 -3
  250. metaflow-stubs/user_configs/config_parameters.pyi +6 -6
  251. metaflow-stubs/user_decorators/__init__.pyi +1 -1
  252. metaflow-stubs/user_decorators/common.pyi +1 -1
  253. metaflow-stubs/user_decorators/mutable_flow.pyi +4 -4
  254. metaflow-stubs/user_decorators/mutable_step.pyi +5 -5
  255. metaflow-stubs/user_decorators/user_flow_decorator.pyi +4 -4
  256. metaflow-stubs/user_decorators/user_step_decorator.pyi +6 -6
  257. {ob_metaflow_stubs-6.0.5.0.dist-info → ob_metaflow_stubs-6.0.5.2.dist-info}/METADATA +1 -1
  258. ob_metaflow_stubs-6.0.5.2.dist-info/RECORD +261 -0
  259. ob_metaflow_stubs-6.0.5.0.dist-info/RECORD +0 -260
  260. {ob_metaflow_stubs-6.0.5.0.dist-info → ob_metaflow_stubs-6.0.5.2.dist-info}/WHEEL +0 -0
  261. {ob_metaflow_stubs-6.0.5.0.dist-info → ob_metaflow_stubs-6.0.5.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
3
  # MF version: 2.16.8.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-31T17:05:42.725448 #
4
+ # Generated on 2025-08-04T19:06:54.653206 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -39,10 +39,10 @@ from .user_decorators.user_step_decorator import UserStepDecorator as UserStepDe
39
39
  from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
40
  from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
41
  from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
42
- from . import events as events
43
- from . import tuple_util as tuple_util
44
42
  from . import cards as cards
43
+ from . import tuple_util as tuple_util
45
44
  from . import metaflow_git as metaflow_git
45
+ from . import events as events
46
46
  from . import runner as runner
47
47
  from . import plugins as plugins
48
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
@@ -72,6 +72,11 @@ from .runner.nbdeploy import NBDeployer as NBDeployer
72
72
  from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.checkpoints.final_api import Checkpoint as Checkpoint
73
73
  from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.datastructures import load_model as load_model
74
74
  from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.datastore.context import artifact_store_from as artifact_store_from
75
+ from .mf_extensions.outerbounds.toplevel.s3_proxy import get_aws_client_with_s3_proxy as get_aws_client_with_s3_proxy
76
+ from .mf_extensions.outerbounds.toplevel.s3_proxy import get_S3_with_s3_proxy as get_S3_with_s3_proxy
77
+ from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import set_s3_proxy_config as set_s3_proxy_config
78
+ from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import clear_s3_proxy_config as clear_s3_proxy_config
79
+ from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import get_s3_proxy_config as get_s3_proxy_config
75
80
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import get_aws_client as get_aws_client
76
81
  from .mf_extensions.outerbounds.plugins.snowflake.snowflake import Snowflake as Snowflake
77
82
  from .mf_extensions.outerbounds.plugins.checkpoint_datastores.nebius import nebius_checkpoints as nebius_checkpoints
@@ -162,214 +167,248 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
162
167
  """
163
168
  ...
164
169
 
165
- @typing.overload
166
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
170
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
167
171
  """
168
- Creates a human-readable report, a Metaflow Card, after this step completes.
172
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
169
173
 
170
- Note that you may add multiple `@card` decorators in a step with different parameters.
174
+ User code call
175
+ --------------
176
+ @ollama(
177
+ models=[...],
178
+ ...
179
+ )
180
+
181
+ Valid backend options
182
+ ---------------------
183
+ - 'local': Run as a separate process on the local task machine.
184
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
185
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
186
+
187
+ Valid model options
188
+ -------------------
189
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
171
190
 
172
191
 
173
192
  Parameters
174
193
  ----------
175
- type : str, default 'default'
176
- Card type.
177
- id : str, optional, default None
178
- If multiple cards are present, use this id to identify this card.
179
- options : Dict[str, Any], default {}
180
- Options passed to the card. The contents depend on the card type.
181
- timeout : int, default 45
182
- Interrupt reporting if it takes more than this many seconds.
194
+ models: list[str]
195
+ List of Ollama containers running models in sidecars.
196
+ backend: str
197
+ Determines where and how to run the Ollama process.
198
+ force_pull: bool
199
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
200
+ cache_update_policy: str
201
+ Cache update policy: "auto", "force", or "never".
202
+ force_cache_update: bool
203
+ Simple override for "force" cache update policy.
204
+ debug: bool
205
+ Whether to turn on verbose debugging logs.
206
+ circuit_breaker_config: dict
207
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
208
+ timeout_config: dict
209
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
183
210
  """
184
211
  ...
185
212
 
186
- @typing.overload
187
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
188
- ...
189
-
190
- @typing.overload
191
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
192
- ...
193
-
194
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
213
+ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
195
214
  """
196
- Creates a human-readable report, a Metaflow Card, after this step completes.
197
-
198
- Note that you may add multiple `@card` decorators in a step with different parameters.
215
+ S3 Proxy decorator for routing S3 requests through a local proxy service.
199
216
 
200
217
 
201
218
  Parameters
202
219
  ----------
203
- type : str, default 'default'
204
- Card type.
205
- id : str, optional, default None
206
- If multiple cards are present, use this id to identify this card.
207
- options : Dict[str, Any], default {}
208
- Options passed to the card. The contents depend on the card type.
209
- timeout : int, default 45
210
- Interrupt reporting if it takes more than this many seconds.
220
+ integration_name : str, optional
221
+ Name of the S3 proxy integration. If not specified, will use the only
222
+ available S3 proxy integration in the namespace (fails if multiple exist).
223
+ write_mode : str, optional
224
+ The desired behavior during write operations to target (origin) S3 bucket.
225
+ allowed options are:
226
+ "origin-and-cache" -> write to both the target S3 bucket and local object
227
+ storage
228
+ "origin" -> only write to the target S3 bucket
229
+ "cache" -> only write to the object storage service used for caching
230
+ debug : bool, optional
231
+ Enable debug logging for proxy operations.
211
232
  """
212
233
  ...
213
234
 
214
235
  @typing.overload
215
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
236
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
216
237
  """
217
- Enables loading / saving of models within a step.
238
+ Enables checkpointing for a step.
218
239
 
219
240
  > Examples
220
- - Saving Models
241
+
242
+ - Saving Checkpoints
243
+
221
244
  ```python
222
- @model
245
+ @checkpoint
223
246
  @step
224
247
  def train(self):
225
- # current.model.save returns a dictionary reference to the model saved
226
- self.my_model = current.model.save(
227
- path_to_my_model,
228
- label="my_model",
229
- metadata={
230
- "epochs": 10,
231
- "batch-size": 32,
232
- "learning-rate": 0.001,
233
- }
234
- )
235
- self.next(self.test)
236
-
237
- @model(load="my_model")
238
- @step
239
- def test(self):
240
- # `current.model.loaded` returns a dictionary of the loaded models
241
- # where the key is the name of the artifact and the value is the path to the model
242
- print(os.listdir(current.model.loaded["my_model"]))
243
- self.next(self.end)
248
+ model = create_model(self.parameters, checkpoint_path = None)
249
+ for i in range(self.epochs):
250
+ # some training logic
251
+ loss = model.train(self.dataset)
252
+ if i % 10 == 0:
253
+ model.save(
254
+ current.checkpoint.directory,
255
+ )
256
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
257
+ # and returns a reference dictionary to the checkpoint saved in the datastore
258
+ self.latest_checkpoint = current.checkpoint.save(
259
+ name="epoch_checkpoint",
260
+ metadata={
261
+ "epoch": i,
262
+ "loss": loss,
263
+ }
264
+ )
244
265
  ```
245
266
 
246
- - Loading models
267
+ - Using Loaded Checkpoints
268
+
247
269
  ```python
270
+ @retry(times=3)
271
+ @checkpoint
248
272
  @step
249
273
  def train(self):
250
- # current.model.load returns the path to the model loaded
251
- checkpoint_path = current.model.load(
252
- self.checkpoint_key,
253
- )
254
- model_path = current.model.load(
255
- self.model,
256
- )
257
- self.next(self.test)
274
+ # Assume that the task has restarted and the previous attempt of the task
275
+ # saved a checkpoint
276
+ checkpoint_path = None
277
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
278
+ print("Loaded checkpoint from the previous attempt")
279
+ checkpoint_path = current.checkpoint.directory
280
+
281
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
282
+ for i in range(self.epochs):
283
+ ...
258
284
  ```
259
285
 
260
286
 
261
287
  Parameters
262
288
  ----------
263
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
264
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
265
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
266
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
267
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
268
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
289
+ load_policy : str, default: "fresh"
290
+ The policy for loading the checkpoint. The following policies are supported:
291
+ - "eager": Loads the the latest available checkpoint within the namespace.
292
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
293
+ will be loaded at the start of the task.
294
+ - "none": Do not load any checkpoint
295
+ - "fresh": Loads the lastest checkpoint created within the running Task.
296
+ This mode helps loading checkpoints across various retry attempts of the same task.
297
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
298
+ created within the task will be loaded when the task is retries execution on failure.
269
299
 
270
300
  temp_dir_root : str, default: None
271
- The root directory under which `current.model.loaded` will store loaded models
301
+ The root directory under which `current.checkpoint.directory` will be created.
272
302
  """
273
303
  ...
274
304
 
275
305
  @typing.overload
276
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
306
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
277
307
  ...
278
308
 
279
309
  @typing.overload
280
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
310
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
281
311
  ...
282
312
 
283
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
313
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
284
314
  """
285
- Enables loading / saving of models within a step.
315
+ Enables checkpointing for a step.
286
316
 
287
317
  > Examples
288
- - Saving Models
318
+
319
+ - Saving Checkpoints
320
+
289
321
  ```python
290
- @model
322
+ @checkpoint
291
323
  @step
292
324
  def train(self):
293
- # current.model.save returns a dictionary reference to the model saved
294
- self.my_model = current.model.save(
295
- path_to_my_model,
296
- label="my_model",
297
- metadata={
298
- "epochs": 10,
299
- "batch-size": 32,
300
- "learning-rate": 0.001,
301
- }
302
- )
303
- self.next(self.test)
304
-
305
- @model(load="my_model")
306
- @step
307
- def test(self):
308
- # `current.model.loaded` returns a dictionary of the loaded models
309
- # where the key is the name of the artifact and the value is the path to the model
310
- print(os.listdir(current.model.loaded["my_model"]))
311
- self.next(self.end)
325
+ model = create_model(self.parameters, checkpoint_path = None)
326
+ for i in range(self.epochs):
327
+ # some training logic
328
+ loss = model.train(self.dataset)
329
+ if i % 10 == 0:
330
+ model.save(
331
+ current.checkpoint.directory,
332
+ )
333
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
334
+ # and returns a reference dictionary to the checkpoint saved in the datastore
335
+ self.latest_checkpoint = current.checkpoint.save(
336
+ name="epoch_checkpoint",
337
+ metadata={
338
+ "epoch": i,
339
+ "loss": loss,
340
+ }
341
+ )
312
342
  ```
313
343
 
314
- - Loading models
344
+ - Using Loaded Checkpoints
345
+
315
346
  ```python
347
+ @retry(times=3)
348
+ @checkpoint
316
349
  @step
317
350
  def train(self):
318
- # current.model.load returns the path to the model loaded
319
- checkpoint_path = current.model.load(
320
- self.checkpoint_key,
321
- )
322
- model_path = current.model.load(
323
- self.model,
324
- )
325
- self.next(self.test)
351
+ # Assume that the task has restarted and the previous attempt of the task
352
+ # saved a checkpoint
353
+ checkpoint_path = None
354
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
355
+ print("Loaded checkpoint from the previous attempt")
356
+ checkpoint_path = current.checkpoint.directory
357
+
358
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
359
+ for i in range(self.epochs):
360
+ ...
326
361
  ```
327
362
 
328
363
 
329
364
  Parameters
330
365
  ----------
331
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
332
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
333
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
334
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
335
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
336
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
366
+ load_policy : str, default: "fresh"
367
+ The policy for loading the checkpoint. The following policies are supported:
368
+ - "eager": Loads the the latest available checkpoint within the namespace.
369
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
370
+ will be loaded at the start of the task.
371
+ - "none": Do not load any checkpoint
372
+ - "fresh": Loads the lastest checkpoint created within the running Task.
373
+ This mode helps loading checkpoints across various retry attempts of the same task.
374
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
375
+ created within the task will be loaded when the task is retries execution on failure.
337
376
 
338
377
  temp_dir_root : str, default: None
339
- The root directory under which `current.model.loaded` will store loaded models
378
+ The root directory under which `current.checkpoint.directory` will be created.
340
379
  """
341
380
  ...
342
381
 
343
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
382
+ @typing.overload
383
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
344
384
  """
345
- Specifies that this step should execute on DGX cloud.
385
+ Specifies environment variables to be set prior to the execution of a step.
346
386
 
347
387
 
348
388
  Parameters
349
389
  ----------
350
- gpu : int
351
- Number of GPUs to use.
352
- gpu_type : str
353
- Type of Nvidia GPU to use.
390
+ vars : Dict[str, str], default {}
391
+ Dictionary of environment variables to set.
354
392
  """
355
393
  ...
356
394
 
357
395
  @typing.overload
358
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
359
- """
360
- Decorator prototype for all step decorators. This function gets specialized
361
- and imported for all decorators types by _import_plugin_decorators().
362
- """
396
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
363
397
  ...
364
398
 
365
399
  @typing.overload
366
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
400
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
367
401
  ...
368
402
 
369
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
403
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
370
404
  """
371
- Decorator prototype for all step decorators. This function gets specialized
372
- and imported for all decorators types by _import_plugin_decorators().
405
+ Specifies environment variables to be set prior to the execution of a step.
406
+
407
+
408
+ Parameters
409
+ ----------
410
+ vars : Dict[str, str], default {}
411
+ Dictionary of environment variables to set.
373
412
  """
374
413
  ...
375
414
 
@@ -424,181 +463,89 @@ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
424
463
  """
425
464
  ...
426
465
 
427
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
466
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
428
467
  """
429
- Decorator that helps cache, version and store models/datasets from huggingface hub.
430
-
431
- > Examples
432
-
433
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
434
- ```python
435
- @huggingface_hub
436
- @step
437
- def pull_model_from_huggingface(self):
438
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
439
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
440
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
441
- # value of the function is a reference to the model in the backend storage.
442
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
443
-
444
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
445
- self.llama_model = current.huggingface_hub.snapshot_download(
446
- repo_id=self.model_id,
447
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
448
- )
449
- self.next(self.train)
450
- ```
451
-
452
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
453
- ```python
454
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
455
- @step
456
- def pull_model_from_huggingface(self):
457
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
458
- ```
459
-
460
- ```python
461
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
462
- @step
463
- def finetune_model(self):
464
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
465
- # path_to_model will be /my-directory
466
- ```
467
-
468
- ```python
469
- # Takes all the arguments passed to `snapshot_download`
470
- # except for `local_dir`
471
- @huggingface_hub(load=[
472
- {
473
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
474
- },
475
- {
476
- "repo_id": "myorg/mistral-lora",
477
- "repo_type": "model",
478
- },
479
- ])
480
- @step
481
- def finetune_model(self):
482
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
483
- # path_to_model will be /my-directory
484
- ```
468
+ Specifies that this step should execute on DGX cloud.
485
469
 
486
470
 
487
471
  Parameters
488
472
  ----------
489
- temp_dir_root : str, optional
490
- The root directory that will hold the temporary directory where objects will be downloaded.
491
-
492
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
493
- The list of repos (models/datasets) to load.
494
-
495
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
496
-
497
- - If repo (model/dataset) is not found in the datastore:
498
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
499
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
500
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
501
-
502
- - If repo is found in the datastore:
503
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
473
+ gpu : int
474
+ Number of GPUs to use.
475
+ gpu_type : str
476
+ Type of Nvidia GPU to use.
477
+ queue_timeout : int
478
+ Time to keep the job in NVCF's queue.
504
479
  """
505
480
  ...
506
481
 
507
482
  @typing.overload
508
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
483
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
509
484
  """
510
- Specifies secrets to be retrieved and injected as environment variables prior to
511
- the execution of a step.
485
+ Creates a human-readable report, a Metaflow Card, after this step completes.
486
+
487
+ Note that you may add multiple `@card` decorators in a step with different parameters.
512
488
 
513
489
 
514
490
  Parameters
515
491
  ----------
516
- sources : List[Union[str, Dict[str, Any]]], default: []
517
- List of secret specs, defining how the secrets are to be retrieved
518
- role : str, optional, default: None
519
- Role to use for fetching secrets
492
+ type : str, default 'default'
493
+ Card type.
494
+ id : str, optional, default None
495
+ If multiple cards are present, use this id to identify this card.
496
+ options : Dict[str, Any], default {}
497
+ Options passed to the card. The contents depend on the card type.
498
+ timeout : int, default 45
499
+ Interrupt reporting if it takes more than this many seconds.
520
500
  """
521
501
  ...
522
502
 
523
503
  @typing.overload
524
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
504
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
525
505
  ...
526
506
 
527
507
  @typing.overload
528
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
508
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
529
509
  ...
530
510
 
531
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
511
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
532
512
  """
533
- Specifies secrets to be retrieved and injected as environment variables prior to
534
- the execution of a step.
513
+ Creates a human-readable report, a Metaflow Card, after this step completes.
514
+
515
+ Note that you may add multiple `@card` decorators in a step with different parameters.
535
516
 
536
517
 
537
518
  Parameters
538
519
  ----------
539
- sources : List[Union[str, Dict[str, Any]]], default: []
540
- List of secret specs, defining how the secrets are to be retrieved
541
- role : str, optional, default: None
542
- Role to use for fetching secrets
520
+ type : str, default 'default'
521
+ Card type.
522
+ id : str, optional, default None
523
+ If multiple cards are present, use this id to identify this card.
524
+ options : Dict[str, Any], default {}
525
+ Options passed to the card. The contents depend on the card type.
526
+ timeout : int, default 45
527
+ Interrupt reporting if it takes more than this many seconds.
543
528
  """
544
529
  ...
545
530
 
546
531
  @typing.overload
547
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
532
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
548
533
  """
549
- Specifies the number of times the task corresponding
550
- to a step needs to be retried.
551
-
552
- This decorator is useful for handling transient errors, such as networking issues.
553
- If your task contains operations that can't be retried safely, e.g. database updates,
554
- it is advisable to annotate it with `@retry(times=0)`.
555
-
556
- This can be used in conjunction with the `@catch` decorator. The `@catch`
557
- decorator will execute a no-op task after all retries have been exhausted,
558
- ensuring that the flow execution can continue.
559
-
560
-
561
- Parameters
562
- ----------
563
- times : int, default 3
564
- Number of times to retry this task.
565
- minutes_between_retries : int, default 2
566
- Number of minutes between retries.
534
+ Internal decorator to support Fast bakery
567
535
  """
568
536
  ...
569
537
 
570
538
  @typing.overload
571
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
572
- ...
573
-
574
- @typing.overload
575
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
539
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
576
540
  ...
577
541
 
578
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
542
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
579
543
  """
580
- Specifies the number of times the task corresponding
581
- to a step needs to be retried.
582
-
583
- This decorator is useful for handling transient errors, such as networking issues.
584
- If your task contains operations that can't be retried safely, e.g. database updates,
585
- it is advisable to annotate it with `@retry(times=0)`.
586
-
587
- This can be used in conjunction with the `@catch` decorator. The `@catch`
588
- decorator will execute a no-op task after all retries have been exhausted,
589
- ensuring that the flow execution can continue.
590
-
591
-
592
- Parameters
593
- ----------
594
- times : int, default 3
595
- Number of times to retry this task.
596
- minutes_between_retries : int, default 2
597
- Number of minutes between retries.
544
+ Internal decorator to support Fast bakery
598
545
  """
599
546
  ...
600
547
 
601
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
548
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
602
549
  """
603
550
  Specifies that this step should execute on DGX cloud.
604
551
 
@@ -609,614 +556,736 @@ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[t
609
556
  Number of GPUs to use.
610
557
  gpu_type : str
611
558
  Type of Nvidia GPU to use.
612
- queue_timeout : int
613
- Time to keep the job in NVCF's queue.
614
559
  """
615
560
  ...
616
561
 
617
562
  @typing.overload
618
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
563
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
619
564
  """
620
- Specifies the resources needed when executing this step.
621
-
622
- Use `@resources` to specify the resource requirements
623
- independently of the specific compute layer (`@batch`, `@kubernetes`).
565
+ Specifies the PyPI packages for the step.
624
566
 
625
- You can choose the compute layer on the command line by executing e.g.
626
- ```
627
- python myflow.py run --with batch
628
- ```
629
- or
630
- ```
631
- python myflow.py run --with kubernetes
632
- ```
633
- which executes the flow on the desired system using the
634
- requirements specified in `@resources`.
567
+ Information in this decorator will augment any
568
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
569
+ you can use `@pypi_base` to set packages required by all
570
+ steps and use `@pypi` to specify step-specific overrides.
635
571
 
636
572
 
637
573
  Parameters
638
574
  ----------
639
- cpu : int, default 1
640
- Number of CPUs required for this step.
641
- gpu : int, optional, default None
642
- Number of GPUs required for this step.
643
- disk : int, optional, default None
644
- Disk size (in MB) required for this step. Only applies on Kubernetes.
645
- memory : int, default 4096
646
- Memory size (in MB) required for this step.
647
- shared_memory : int, optional, default None
648
- The value for the size (in MiB) of the /dev/shm volume for this step.
649
- This parameter maps to the `--shm-size` option in Docker.
575
+ packages : Dict[str, str], default: {}
576
+ Packages to use for this step. The key is the name of the package
577
+ and the value is the version to use.
578
+ python : str, optional, default: None
579
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
580
+ that the version used will correspond to the version of the Python interpreter used to start the run.
650
581
  """
651
582
  ...
652
583
 
653
584
  @typing.overload
654
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
585
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
655
586
  ...
656
587
 
657
588
  @typing.overload
658
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
589
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
659
590
  ...
660
591
 
661
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
592
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
662
593
  """
663
- Specifies the resources needed when executing this step.
594
+ Specifies the PyPI packages for the step.
664
595
 
665
- Use `@resources` to specify the resource requirements
666
- independently of the specific compute layer (`@batch`, `@kubernetes`).
667
-
668
- You can choose the compute layer on the command line by executing e.g.
669
- ```
670
- python myflow.py run --with batch
671
- ```
672
- or
673
- ```
674
- python myflow.py run --with kubernetes
675
- ```
676
- which executes the flow on the desired system using the
677
- requirements specified in `@resources`.
596
+ Information in this decorator will augment any
597
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
598
+ you can use `@pypi_base` to set packages required by all
599
+ steps and use `@pypi` to specify step-specific overrides.
678
600
 
679
601
 
680
602
  Parameters
681
603
  ----------
682
- cpu : int, default 1
683
- Number of CPUs required for this step.
684
- gpu : int, optional, default None
685
- Number of GPUs required for this step.
686
- disk : int, optional, default None
687
- Disk size (in MB) required for this step. Only applies on Kubernetes.
688
- memory : int, default 4096
689
- Memory size (in MB) required for this step.
690
- shared_memory : int, optional, default None
691
- The value for the size (in MiB) of the /dev/shm volume for this step.
692
- This parameter maps to the `--shm-size` option in Docker.
604
+ packages : Dict[str, str], default: {}
605
+ Packages to use for this step. The key is the name of the package
606
+ and the value is the version to use.
607
+ python : str, optional, default: None
608
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
609
+ that the version used will correspond to the version of the Python interpreter used to start the run.
693
610
  """
694
611
  ...
695
612
 
696
613
  @typing.overload
697
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
614
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
698
615
  """
699
- Specifies the Conda environment for the step.
700
-
701
- Information in this decorator will augment any
702
- attributes set in the `@conda_base` flow-level decorator. Hence,
703
- you can use `@conda_base` to set packages required by all
704
- steps and use `@conda` to specify step-specific overrides.
616
+ Specifies secrets to be retrieved and injected as environment variables prior to
617
+ the execution of a step.
705
618
 
706
619
 
707
620
  Parameters
708
621
  ----------
709
- packages : Dict[str, str], default {}
710
- Packages to use for this step. The key is the name of the package
711
- and the value is the version to use.
712
- libraries : Dict[str, str], default {}
713
- Supported for backward compatibility. When used with packages, packages will take precedence.
714
- python : str, optional, default None
715
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
716
- that the version used will correspond to the version of the Python interpreter used to start the run.
717
- disabled : bool, default False
718
- If set to True, disables @conda.
622
+ sources : List[Union[str, Dict[str, Any]]], default: []
623
+ List of secret specs, defining how the secrets are to be retrieved
624
+ role : str, optional, default: None
625
+ Role to use for fetching secrets
719
626
  """
720
627
  ...
721
628
 
722
629
  @typing.overload
723
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
630
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
724
631
  ...
725
632
 
726
633
  @typing.overload
727
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
634
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
728
635
  ...
729
636
 
730
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
637
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
731
638
  """
732
- Specifies the Conda environment for the step.
733
-
734
- Information in this decorator will augment any
735
- attributes set in the `@conda_base` flow-level decorator. Hence,
736
- you can use `@conda_base` to set packages required by all
737
- steps and use `@conda` to specify step-specific overrides.
639
+ Specifies secrets to be retrieved and injected as environment variables prior to
640
+ the execution of a step.
738
641
 
739
642
 
740
643
  Parameters
741
644
  ----------
742
- packages : Dict[str, str], default {}
743
- Packages to use for this step. The key is the name of the package
744
- and the value is the version to use.
745
- libraries : Dict[str, str], default {}
746
- Supported for backward compatibility. When used with packages, packages will take precedence.
747
- python : str, optional, default None
748
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
749
- that the version used will correspond to the version of the Python interpreter used to start the run.
750
- disabled : bool, default False
751
- If set to True, disables @conda.
645
+ sources : List[Union[str, Dict[str, Any]]], default: []
646
+ List of secret specs, defining how the secrets are to be retrieved
647
+ role : str, optional, default: None
648
+ Role to use for fetching secrets
752
649
  """
753
650
  ...
754
651
 
755
652
  @typing.overload
756
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
653
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
757
654
  """
758
- Internal decorator to support Fast bakery
655
+ A simple decorator that demonstrates using CardDecoratorInjector
656
+ to inject a card and render simple markdown content.
759
657
  """
760
658
  ...
761
659
 
762
660
  @typing.overload
763
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
661
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
764
662
  ...
765
663
 
766
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
664
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
767
665
  """
768
- Internal decorator to support Fast bakery
666
+ A simple decorator that demonstrates using CardDecoratorInjector
667
+ to inject a card and render simple markdown content.
769
668
  """
770
669
  ...
771
670
 
772
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
671
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
773
672
  """
774
- This decorator is used to run vllm APIs as Metaflow task sidecars.
775
-
776
- User code call
777
- --------------
778
- @vllm(
779
- model="...",
780
- ...
781
- )
782
-
783
- Valid backend options
784
- ---------------------
785
- - 'local': Run as a separate process on the local task machine.
786
-
787
- Valid model options
788
- -------------------
789
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
790
-
791
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
792
- If you need multiple models, you must create multiple @vllm decorators.
673
+ Specifies that this step should execute on Kubernetes.
793
674
 
794
675
 
795
676
  Parameters
796
677
  ----------
797
- model: str
798
- HuggingFace model identifier to be served by vLLM.
799
- backend: str
800
- Determines where and how to run the vLLM process.
801
- openai_api_server: bool
802
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
803
- Default is False (uses native engine).
804
- Set to True for backward compatibility with existing code.
805
- debug: bool
806
- Whether to turn on verbose debugging logs.
807
- card_refresh_interval: int
808
- Interval in seconds for refreshing the vLLM status card.
809
- Only used when openai_api_server=True.
810
- max_retries: int
811
- Maximum number of retries checking for vLLM server startup.
812
- Only used when openai_api_server=True.
813
- retry_alert_frequency: int
814
- Frequency of alert logs for vLLM server startup retries.
815
- Only used when openai_api_server=True.
816
- engine_args : dict
817
- Additional keyword arguments to pass to the vLLM engine.
818
- For example, `tensor_parallel_size=2`.
678
+ cpu : int, default 1
679
+ Number of CPUs required for this step. If `@resources` is
680
+ also present, the maximum value from all decorators is used.
681
+ memory : int, default 4096
682
+ Memory size (in MB) required for this step. If
683
+ `@resources` is also present, the maximum value from all decorators is
684
+ used.
685
+ disk : int, default 10240
686
+ Disk size (in MB) required for this step. If
687
+ `@resources` is also present, the maximum value from all decorators is
688
+ used.
689
+ image : str, optional, default None
690
+ Docker image to use when launching on Kubernetes. If not specified, and
691
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
692
+ not, a default Docker image mapping to the current version of Python is used.
693
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
694
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
695
+ image_pull_secrets: List[str], default []
696
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
697
+ Kubernetes image pull secrets to use when pulling container images
698
+ in Kubernetes.
699
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
700
+ Kubernetes service account to use when launching pod in Kubernetes.
701
+ secrets : List[str], optional, default None
702
+ Kubernetes secrets to use when launching pod in Kubernetes. These
703
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
704
+ in Metaflow configuration.
705
+ node_selector: Union[Dict[str,str], str], optional, default None
706
+ Kubernetes node selector(s) to apply to the pod running the task.
707
+ Can be passed in as a comma separated string of values e.g.
708
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
709
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
710
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
711
+ Kubernetes namespace to use when launching pod in Kubernetes.
712
+ gpu : int, optional, default None
713
+ Number of GPUs required for this step. A value of zero implies that
714
+ the scheduled node should not have GPUs.
715
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
716
+ The vendor of the GPUs to be used for this step.
717
+ tolerations : List[Dict[str,str]], default []
718
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
719
+ Kubernetes tolerations to use when launching pod in Kubernetes.
720
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
721
+ Kubernetes labels to use when launching pod in Kubernetes.
722
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
723
+ Kubernetes annotations to use when launching pod in Kubernetes.
724
+ use_tmpfs : bool, default False
725
+ This enables an explicit tmpfs mount for this step.
726
+ tmpfs_tempdir : bool, default True
727
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
728
+ tmpfs_size : int, optional, default: None
729
+ The value for the size (in MiB) of the tmpfs mount for this step.
730
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
731
+ memory allocated for this step.
732
+ tmpfs_path : str, optional, default /metaflow_temp
733
+ Path to tmpfs mount for this step.
734
+ persistent_volume_claims : Dict[str, str], optional, default None
735
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
736
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
737
+ shared_memory: int, optional
738
+ Shared memory size (in MiB) required for this step
739
+ port: int, optional
740
+ Port number to specify in the Kubernetes job object
741
+ compute_pool : str, optional, default None
742
+ Compute pool to be used for for this step.
743
+ If not specified, any accessible compute pool within the perimeter is used.
744
+ hostname_resolution_timeout: int, default 10 * 60
745
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
746
+ Only applicable when @parallel is used.
747
+ qos: str, default: Burstable
748
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
749
+
750
+ security_context: Dict[str, Any], optional, default None
751
+ Container security context. Applies to the task container. Allows the following keys:
752
+ - privileged: bool, optional, default None
753
+ - allow_privilege_escalation: bool, optional, default None
754
+ - run_as_user: int, optional, default None
755
+ - run_as_group: int, optional, default None
756
+ - run_as_non_root: bool, optional, default None
819
757
  """
820
758
  ...
821
759
 
822
760
  @typing.overload
823
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
761
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
824
762
  """
825
- Specifies the PyPI packages for the step.
826
-
827
- Information in this decorator will augment any
828
- attributes set in the `@pyi_base` flow-level decorator. Hence,
829
- you can use `@pypi_base` to set packages required by all
830
- steps and use `@pypi` to specify step-specific overrides.
831
-
832
-
833
- Parameters
834
- ----------
835
- packages : Dict[str, str], default: {}
836
- Packages to use for this step. The key is the name of the package
837
- and the value is the version to use.
838
- python : str, optional, default: None
839
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
840
- that the version used will correspond to the version of the Python interpreter used to start the run.
763
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
764
+ It exists to make it easier for users to know that this decorator should only be used with
765
+ a Neo Cloud like Nebius.
841
766
  """
842
767
  ...
843
768
 
844
769
  @typing.overload
845
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
770
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
846
771
  ...
847
772
 
848
- @typing.overload
849
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
773
+ def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
774
+ """
775
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
776
+ It exists to make it easier for users to know that this decorator should only be used with
777
+ a Neo Cloud like Nebius.
778
+ """
850
779
  ...
851
780
 
852
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
781
+ @typing.overload
782
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
853
783
  """
854
- Specifies the PyPI packages for the step.
784
+ Specifies the number of times the task corresponding
785
+ to a step needs to be retried.
855
786
 
856
- Information in this decorator will augment any
857
- attributes set in the `@pyi_base` flow-level decorator. Hence,
858
- you can use `@pypi_base` to set packages required by all
859
- steps and use `@pypi` to specify step-specific overrides.
787
+ This decorator is useful for handling transient errors, such as networking issues.
788
+ If your task contains operations that can't be retried safely, e.g. database updates,
789
+ it is advisable to annotate it with `@retry(times=0)`.
790
+
791
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
792
+ decorator will execute a no-op task after all retries have been exhausted,
793
+ ensuring that the flow execution can continue.
860
794
 
861
795
 
862
796
  Parameters
863
797
  ----------
864
- packages : Dict[str, str], default: {}
865
- Packages to use for this step. The key is the name of the package
866
- and the value is the version to use.
867
- python : str, optional, default: None
868
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
869
- that the version used will correspond to the version of the Python interpreter used to start the run.
798
+ times : int, default 3
799
+ Number of times to retry this task.
800
+ minutes_between_retries : int, default 2
801
+ Number of minutes between retries.
870
802
  """
871
803
  ...
872
804
 
873
805
  @typing.overload
874
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
806
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
807
+ ...
808
+
809
+ @typing.overload
810
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
811
+ ...
812
+
813
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
875
814
  """
876
- Decorator prototype for all step decorators. This function gets specialized
877
- and imported for all decorators types by _import_plugin_decorators().
815
+ Specifies the number of times the task corresponding
816
+ to a step needs to be retried.
817
+
818
+ This decorator is useful for handling transient errors, such as networking issues.
819
+ If your task contains operations that can't be retried safely, e.g. database updates,
820
+ it is advisable to annotate it with `@retry(times=0)`.
821
+
822
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
823
+ decorator will execute a no-op task after all retries have been exhausted,
824
+ ensuring that the flow execution can continue.
825
+
826
+
827
+ Parameters
828
+ ----------
829
+ times : int, default 3
830
+ Number of times to retry this task.
831
+ minutes_between_retries : int, default 2
832
+ Number of minutes between retries.
833
+ """
834
+ ...
835
+
836
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
837
+ """
838
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
839
+
840
+ > Examples
841
+
842
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
843
+ ```python
844
+ @huggingface_hub
845
+ @step
846
+ def pull_model_from_huggingface(self):
847
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
848
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
849
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
850
+ # value of the function is a reference to the model in the backend storage.
851
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
852
+
853
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
854
+ self.llama_model = current.huggingface_hub.snapshot_download(
855
+ repo_id=self.model_id,
856
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
857
+ )
858
+ self.next(self.train)
859
+ ```
860
+
861
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
862
+ ```python
863
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
864
+ @step
865
+ def pull_model_from_huggingface(self):
866
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
867
+ ```
868
+
869
+ ```python
870
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
871
+ @step
872
+ def finetune_model(self):
873
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
874
+ # path_to_model will be /my-directory
875
+ ```
876
+
877
+ ```python
878
+ # Takes all the arguments passed to `snapshot_download`
879
+ # except for `local_dir`
880
+ @huggingface_hub(load=[
881
+ {
882
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
883
+ },
884
+ {
885
+ "repo_id": "myorg/mistral-lora",
886
+ "repo_type": "model",
887
+ },
888
+ ])
889
+ @step
890
+ def finetune_model(self):
891
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
892
+ # path_to_model will be /my-directory
893
+ ```
894
+
895
+
896
+ Parameters
897
+ ----------
898
+ temp_dir_root : str, optional
899
+ The root directory that will hold the temporary directory where objects will be downloaded.
900
+
901
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
902
+ The list of repos (models/datasets) to load.
903
+
904
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
905
+
906
+ - If repo (model/dataset) is not found in the datastore:
907
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
908
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
909
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
910
+
911
+ - If repo is found in the datastore:
912
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
878
913
  """
879
914
  ...
880
915
 
881
916
  @typing.overload
882
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
917
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
918
+ """
919
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
920
+ It exists to make it easier for users to know that this decorator should only be used with
921
+ a Neo Cloud like CoreWeave.
922
+ """
883
923
  ...
884
924
 
885
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
925
+ @typing.overload
926
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
927
+ ...
928
+
929
+ def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
886
930
  """
887
- Decorator prototype for all step decorators. This function gets specialized
888
- and imported for all decorators types by _import_plugin_decorators().
931
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
932
+ It exists to make it easier for users to know that this decorator should only be used with
933
+ a Neo Cloud like CoreWeave.
889
934
  """
890
935
  ...
891
936
 
892
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
937
+ @typing.overload
938
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
893
939
  """
894
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
895
-
896
- User code call
897
- --------------
898
- @ollama(
899
- models=[...],
900
- ...
901
- )
940
+ Specifies the resources needed when executing this step.
902
941
 
903
- Valid backend options
904
- ---------------------
905
- - 'local': Run as a separate process on the local task machine.
906
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
907
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
942
+ Use `@resources` to specify the resource requirements
943
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
908
944
 
909
- Valid model options
910
- -------------------
911
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
945
+ You can choose the compute layer on the command line by executing e.g.
946
+ ```
947
+ python myflow.py run --with batch
948
+ ```
949
+ or
950
+ ```
951
+ python myflow.py run --with kubernetes
952
+ ```
953
+ which executes the flow on the desired system using the
954
+ requirements specified in `@resources`.
912
955
 
913
956
 
914
957
  Parameters
915
958
  ----------
916
- models: list[str]
917
- List of Ollama containers running models in sidecars.
918
- backend: str
919
- Determines where and how to run the Ollama process.
920
- force_pull: bool
921
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
922
- cache_update_policy: str
923
- Cache update policy: "auto", "force", or "never".
924
- force_cache_update: bool
925
- Simple override for "force" cache update policy.
926
- debug: bool
927
- Whether to turn on verbose debugging logs.
928
- circuit_breaker_config: dict
929
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
930
- timeout_config: dict
931
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
959
+ cpu : int, default 1
960
+ Number of CPUs required for this step.
961
+ gpu : int, optional, default None
962
+ Number of GPUs required for this step.
963
+ disk : int, optional, default None
964
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
965
+ memory : int, default 4096
966
+ Memory size (in MB) required for this step.
967
+ shared_memory : int, optional, default None
968
+ The value for the size (in MiB) of the /dev/shm volume for this step.
969
+ This parameter maps to the `--shm-size` option in Docker.
932
970
  """
933
971
  ...
934
972
 
935
973
  @typing.overload
936
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
974
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
975
+ ...
976
+
977
+ @typing.overload
978
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
979
+ ...
980
+
981
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
937
982
  """
938
- Enables checkpointing for a step.
983
+ Specifies the resources needed when executing this step.
939
984
 
940
- > Examples
985
+ Use `@resources` to specify the resource requirements
986
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
941
987
 
942
- - Saving Checkpoints
988
+ You can choose the compute layer on the command line by executing e.g.
989
+ ```
990
+ python myflow.py run --with batch
991
+ ```
992
+ or
993
+ ```
994
+ python myflow.py run --with kubernetes
995
+ ```
996
+ which executes the flow on the desired system using the
997
+ requirements specified in `@resources`.
998
+
999
+
1000
+ Parameters
1001
+ ----------
1002
+ cpu : int, default 1
1003
+ Number of CPUs required for this step.
1004
+ gpu : int, optional, default None
1005
+ Number of GPUs required for this step.
1006
+ disk : int, optional, default None
1007
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1008
+ memory : int, default 4096
1009
+ Memory size (in MB) required for this step.
1010
+ shared_memory : int, optional, default None
1011
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1012
+ This parameter maps to the `--shm-size` option in Docker.
1013
+ """
1014
+ ...
1015
+
1016
+ @typing.overload
1017
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1018
+ """
1019
+ Decorator prototype for all step decorators. This function gets specialized
1020
+ and imported for all decorators types by _import_plugin_decorators().
1021
+ """
1022
+ ...
1023
+
1024
+ @typing.overload
1025
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1026
+ ...
1027
+
1028
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1029
+ """
1030
+ Decorator prototype for all step decorators. This function gets specialized
1031
+ and imported for all decorators types by _import_plugin_decorators().
1032
+ """
1033
+ ...
1034
+
1035
+ @typing.overload
1036
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1037
+ """
1038
+ Enables loading / saving of models within a step.
943
1039
 
1040
+ > Examples
1041
+ - Saving Models
944
1042
  ```python
945
- @checkpoint
1043
+ @model
946
1044
  @step
947
1045
  def train(self):
948
- model = create_model(self.parameters, checkpoint_path = None)
949
- for i in range(self.epochs):
950
- # some training logic
951
- loss = model.train(self.dataset)
952
- if i % 10 == 0:
953
- model.save(
954
- current.checkpoint.directory,
955
- )
956
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
957
- # and returns a reference dictionary to the checkpoint saved in the datastore
958
- self.latest_checkpoint = current.checkpoint.save(
959
- name="epoch_checkpoint",
960
- metadata={
961
- "epoch": i,
962
- "loss": loss,
963
- }
964
- )
965
- ```
1046
+ # current.model.save returns a dictionary reference to the model saved
1047
+ self.my_model = current.model.save(
1048
+ path_to_my_model,
1049
+ label="my_model",
1050
+ metadata={
1051
+ "epochs": 10,
1052
+ "batch-size": 32,
1053
+ "learning-rate": 0.001,
1054
+ }
1055
+ )
1056
+ self.next(self.test)
966
1057
 
967
- - Using Loaded Checkpoints
1058
+ @model(load="my_model")
1059
+ @step
1060
+ def test(self):
1061
+ # `current.model.loaded` returns a dictionary of the loaded models
1062
+ # where the key is the name of the artifact and the value is the path to the model
1063
+ print(os.listdir(current.model.loaded["my_model"]))
1064
+ self.next(self.end)
1065
+ ```
968
1066
 
1067
+ - Loading models
969
1068
  ```python
970
- @retry(times=3)
971
- @checkpoint
972
1069
  @step
973
1070
  def train(self):
974
- # Assume that the task has restarted and the previous attempt of the task
975
- # saved a checkpoint
976
- checkpoint_path = None
977
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
978
- print("Loaded checkpoint from the previous attempt")
979
- checkpoint_path = current.checkpoint.directory
980
-
981
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
982
- for i in range(self.epochs):
983
- ...
1071
+ # current.model.load returns the path to the model loaded
1072
+ checkpoint_path = current.model.load(
1073
+ self.checkpoint_key,
1074
+ )
1075
+ model_path = current.model.load(
1076
+ self.model,
1077
+ )
1078
+ self.next(self.test)
984
1079
  ```
985
1080
 
986
1081
 
987
1082
  Parameters
988
1083
  ----------
989
- load_policy : str, default: "fresh"
990
- The policy for loading the checkpoint. The following policies are supported:
991
- - "eager": Loads the the latest available checkpoint within the namespace.
992
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
993
- will be loaded at the start of the task.
994
- - "none": Do not load any checkpoint
995
- - "fresh": Loads the lastest checkpoint created within the running Task.
996
- This mode helps loading checkpoints across various retry attempts of the same task.
997
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
998
- created within the task will be loaded when the task is retries execution on failure.
1084
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1085
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1086
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1087
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1088
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1089
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
999
1090
 
1000
1091
  temp_dir_root : str, default: None
1001
- The root directory under which `current.checkpoint.directory` will be created.
1092
+ The root directory under which `current.model.loaded` will store loaded models
1002
1093
  """
1003
1094
  ...
1004
1095
 
1005
1096
  @typing.overload
1006
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1097
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1007
1098
  ...
1008
1099
 
1009
1100
  @typing.overload
1010
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1101
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1011
1102
  ...
1012
1103
 
1013
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
1104
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
1014
1105
  """
1015
- Enables checkpointing for a step.
1106
+ Enables loading / saving of models within a step.
1016
1107
 
1017
1108
  > Examples
1018
-
1019
- - Saving Checkpoints
1020
-
1109
+ - Saving Models
1021
1110
  ```python
1022
- @checkpoint
1111
+ @model
1023
1112
  @step
1024
1113
  def train(self):
1025
- model = create_model(self.parameters, checkpoint_path = None)
1026
- for i in range(self.epochs):
1027
- # some training logic
1028
- loss = model.train(self.dataset)
1029
- if i % 10 == 0:
1030
- model.save(
1031
- current.checkpoint.directory,
1032
- )
1033
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1034
- # and returns a reference dictionary to the checkpoint saved in the datastore
1035
- self.latest_checkpoint = current.checkpoint.save(
1036
- name="epoch_checkpoint",
1037
- metadata={
1038
- "epoch": i,
1039
- "loss": loss,
1040
- }
1041
- )
1042
- ```
1114
+ # current.model.save returns a dictionary reference to the model saved
1115
+ self.my_model = current.model.save(
1116
+ path_to_my_model,
1117
+ label="my_model",
1118
+ metadata={
1119
+ "epochs": 10,
1120
+ "batch-size": 32,
1121
+ "learning-rate": 0.001,
1122
+ }
1123
+ )
1124
+ self.next(self.test)
1043
1125
 
1044
- - Using Loaded Checkpoints
1126
+ @model(load="my_model")
1127
+ @step
1128
+ def test(self):
1129
+ # `current.model.loaded` returns a dictionary of the loaded models
1130
+ # where the key is the name of the artifact and the value is the path to the model
1131
+ print(os.listdir(current.model.loaded["my_model"]))
1132
+ self.next(self.end)
1133
+ ```
1045
1134
 
1135
+ - Loading models
1046
1136
  ```python
1047
- @retry(times=3)
1048
- @checkpoint
1049
1137
  @step
1050
1138
  def train(self):
1051
- # Assume that the task has restarted and the previous attempt of the task
1052
- # saved a checkpoint
1053
- checkpoint_path = None
1054
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1055
- print("Loaded checkpoint from the previous attempt")
1056
- checkpoint_path = current.checkpoint.directory
1057
-
1058
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1059
- for i in range(self.epochs):
1060
- ...
1139
+ # current.model.load returns the path to the model loaded
1140
+ checkpoint_path = current.model.load(
1141
+ self.checkpoint_key,
1142
+ )
1143
+ model_path = current.model.load(
1144
+ self.model,
1145
+ )
1146
+ self.next(self.test)
1061
1147
  ```
1062
1148
 
1063
1149
 
1064
1150
  Parameters
1065
1151
  ----------
1066
- load_policy : str, default: "fresh"
1067
- The policy for loading the checkpoint. The following policies are supported:
1068
- - "eager": Loads the the latest available checkpoint within the namespace.
1069
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1070
- will be loaded at the start of the task.
1071
- - "none": Do not load any checkpoint
1072
- - "fresh": Loads the lastest checkpoint created within the running Task.
1073
- This mode helps loading checkpoints across various retry attempts of the same task.
1074
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1075
- created within the task will be loaded when the task is retries execution on failure.
1152
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1153
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1154
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1155
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1156
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1157
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
1076
1158
 
1077
1159
  temp_dir_root : str, default: None
1078
- The root directory under which `current.checkpoint.directory` will be created.
1079
- """
1080
- ...
1081
-
1082
- @typing.overload
1083
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1084
- """
1085
- A simple decorator that demonstrates using CardDecoratorInjector
1086
- to inject a card and render simple markdown content.
1160
+ The root directory under which `current.model.loaded` will store loaded models
1087
1161
  """
1088
1162
  ...
1089
1163
 
1090
- @typing.overload
1091
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1092
- ...
1093
-
1094
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1164
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1095
1165
  """
1096
- A simple decorator that demonstrates using CardDecoratorInjector
1097
- to inject a card and render simple markdown content.
1166
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
1167
+
1168
+ User code call
1169
+ --------------
1170
+ @vllm(
1171
+ model="...",
1172
+ ...
1173
+ )
1174
+
1175
+ Valid backend options
1176
+ ---------------------
1177
+ - 'local': Run as a separate process on the local task machine.
1178
+
1179
+ Valid model options
1180
+ -------------------
1181
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1182
+
1183
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1184
+ If you need multiple models, you must create multiple @vllm decorators.
1185
+
1186
+
1187
+ Parameters
1188
+ ----------
1189
+ model: str
1190
+ HuggingFace model identifier to be served by vLLM.
1191
+ backend: str
1192
+ Determines where and how to run the vLLM process.
1193
+ openai_api_server: bool
1194
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1195
+ Default is False (uses native engine).
1196
+ Set to True for backward compatibility with existing code.
1197
+ debug: bool
1198
+ Whether to turn on verbose debugging logs.
1199
+ card_refresh_interval: int
1200
+ Interval in seconds for refreshing the vLLM status card.
1201
+ Only used when openai_api_server=True.
1202
+ max_retries: int
1203
+ Maximum number of retries checking for vLLM server startup.
1204
+ Only used when openai_api_server=True.
1205
+ retry_alert_frequency: int
1206
+ Frequency of alert logs for vLLM server startup retries.
1207
+ Only used when openai_api_server=True.
1208
+ engine_args : dict
1209
+ Additional keyword arguments to pass to the vLLM engine.
1210
+ For example, `tensor_parallel_size=2`.
1098
1211
  """
1099
1212
  ...
1100
1213
 
1101
1214
  @typing.overload
1102
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1215
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1103
1216
  """
1104
- Specifies environment variables to be set prior to the execution of a step.
1217
+ Specifies the Conda environment for the step.
1218
+
1219
+ Information in this decorator will augment any
1220
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1221
+ you can use `@conda_base` to set packages required by all
1222
+ steps and use `@conda` to specify step-specific overrides.
1105
1223
 
1106
1224
 
1107
1225
  Parameters
1108
1226
  ----------
1109
- vars : Dict[str, str], default {}
1110
- Dictionary of environment variables to set.
1227
+ packages : Dict[str, str], default {}
1228
+ Packages to use for this step. The key is the name of the package
1229
+ and the value is the version to use.
1230
+ libraries : Dict[str, str], default {}
1231
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1232
+ python : str, optional, default None
1233
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1234
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1235
+ disabled : bool, default False
1236
+ If set to True, disables @conda.
1111
1237
  """
1112
1238
  ...
1113
1239
 
1114
1240
  @typing.overload
1115
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1241
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1116
1242
  ...
1117
1243
 
1118
1244
  @typing.overload
1119
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1245
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1120
1246
  ...
1121
1247
 
1122
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1248
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1123
1249
  """
1124
- Specifies environment variables to be set prior to the execution of a step.
1250
+ Specifies the Conda environment for the step.
1251
+
1252
+ Information in this decorator will augment any
1253
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1254
+ you can use `@conda_base` to set packages required by all
1255
+ steps and use `@conda` to specify step-specific overrides.
1125
1256
 
1126
1257
 
1127
1258
  Parameters
1128
1259
  ----------
1129
- vars : Dict[str, str], default {}
1130
- Dictionary of environment variables to set.
1260
+ packages : Dict[str, str], default {}
1261
+ Packages to use for this step. The key is the name of the package
1262
+ and the value is the version to use.
1263
+ libraries : Dict[str, str], default {}
1264
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1265
+ python : str, optional, default None
1266
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1267
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1268
+ disabled : bool, default False
1269
+ If set to True, disables @conda.
1131
1270
  """
1132
1271
  ...
1133
1272
 
1134
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1273
+ @typing.overload
1274
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1135
1275
  """
1136
- Specifies that this step should execute on Kubernetes.
1137
-
1138
-
1139
- Parameters
1140
- ----------
1141
- cpu : int, default 1
1142
- Number of CPUs required for this step. If `@resources` is
1143
- also present, the maximum value from all decorators is used.
1144
- memory : int, default 4096
1145
- Memory size (in MB) required for this step. If
1146
- `@resources` is also present, the maximum value from all decorators is
1147
- used.
1148
- disk : int, default 10240
1149
- Disk size (in MB) required for this step. If
1150
- `@resources` is also present, the maximum value from all decorators is
1151
- used.
1152
- image : str, optional, default None
1153
- Docker image to use when launching on Kubernetes. If not specified, and
1154
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
1155
- not, a default Docker image mapping to the current version of Python is used.
1156
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
1157
- If given, the imagePullPolicy to be applied to the Docker image of the step.
1158
- image_pull_secrets: List[str], default []
1159
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
1160
- Kubernetes image pull secrets to use when pulling container images
1161
- in Kubernetes.
1162
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
1163
- Kubernetes service account to use when launching pod in Kubernetes.
1164
- secrets : List[str], optional, default None
1165
- Kubernetes secrets to use when launching pod in Kubernetes. These
1166
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
1167
- in Metaflow configuration.
1168
- node_selector: Union[Dict[str,str], str], optional, default None
1169
- Kubernetes node selector(s) to apply to the pod running the task.
1170
- Can be passed in as a comma separated string of values e.g.
1171
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
1172
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
1173
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
1174
- Kubernetes namespace to use when launching pod in Kubernetes.
1175
- gpu : int, optional, default None
1176
- Number of GPUs required for this step. A value of zero implies that
1177
- the scheduled node should not have GPUs.
1178
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
1179
- The vendor of the GPUs to be used for this step.
1180
- tolerations : List[Dict[str,str]], default []
1181
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
1182
- Kubernetes tolerations to use when launching pod in Kubernetes.
1183
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
1184
- Kubernetes labels to use when launching pod in Kubernetes.
1185
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
1186
- Kubernetes annotations to use when launching pod in Kubernetes.
1187
- use_tmpfs : bool, default False
1188
- This enables an explicit tmpfs mount for this step.
1189
- tmpfs_tempdir : bool, default True
1190
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
1191
- tmpfs_size : int, optional, default: None
1192
- The value for the size (in MiB) of the tmpfs mount for this step.
1193
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
1194
- memory allocated for this step.
1195
- tmpfs_path : str, optional, default /metaflow_temp
1196
- Path to tmpfs mount for this step.
1197
- persistent_volume_claims : Dict[str, str], optional, default None
1198
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
1199
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
1200
- shared_memory: int, optional
1201
- Shared memory size (in MiB) required for this step
1202
- port: int, optional
1203
- Port number to specify in the Kubernetes job object
1204
- compute_pool : str, optional, default None
1205
- Compute pool to be used for for this step.
1206
- If not specified, any accessible compute pool within the perimeter is used.
1207
- hostname_resolution_timeout: int, default 10 * 60
1208
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
1209
- Only applicable when @parallel is used.
1210
- qos: str, default: Burstable
1211
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
1212
-
1213
- security_context: Dict[str, Any], optional, default None
1214
- Container security context. Applies to the task container. Allows the following keys:
1215
- - privileged: bool, optional, default None
1216
- - allow_privilege_escalation: bool, optional, default None
1217
- - run_as_user: int, optional, default None
1218
- - run_as_group: int, optional, default None
1219
- - run_as_non_root: bool, optional, default None
1276
+ Decorator prototype for all step decorators. This function gets specialized
1277
+ and imported for all decorators types by _import_plugin_decorators().
1278
+ """
1279
+ ...
1280
+
1281
+ @typing.overload
1282
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1283
+ ...
1284
+
1285
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1286
+ """
1287
+ Decorator prototype for all step decorators. This function gets specialized
1288
+ and imported for all decorators types by _import_plugin_decorators().
1220
1289
  """
1221
1290
  ...
1222
1291
 
@@ -1280,208 +1349,131 @@ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None],
1280
1349
  ...
1281
1350
 
1282
1351
  @typing.overload
1283
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1352
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1284
1353
  """
1285
- Specifies the Conda environment for all steps of the flow.
1286
-
1287
- Use `@conda_base` to set common libraries required by all
1288
- steps and use `@conda` to specify step-specific additions.
1354
+ Specifies the times when the flow should be run when running on a
1355
+ production scheduler.
1289
1356
 
1290
1357
 
1291
1358
  Parameters
1292
1359
  ----------
1293
- packages : Dict[str, str], default {}
1294
- Packages to use for this flow. The key is the name of the package
1295
- and the value is the version to use.
1296
- libraries : Dict[str, str], default {}
1297
- Supported for backward compatibility. When used with packages, packages will take precedence.
1298
- python : str, optional, default None
1299
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1300
- that the version used will correspond to the version of the Python interpreter used to start the run.
1301
- disabled : bool, default False
1302
- If set to True, disables Conda.
1360
+ hourly : bool, default False
1361
+ Run the workflow hourly.
1362
+ daily : bool, default True
1363
+ Run the workflow daily.
1364
+ weekly : bool, default False
1365
+ Run the workflow weekly.
1366
+ cron : str, optional, default None
1367
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1368
+ specified by this expression.
1369
+ timezone : str, optional, default None
1370
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1371
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1303
1372
  """
1304
1373
  ...
1305
1374
 
1306
1375
  @typing.overload
1307
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1308
- ...
1309
-
1310
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1311
- """
1312
- Specifies the Conda environment for all steps of the flow.
1313
-
1314
- Use `@conda_base` to set common libraries required by all
1315
- steps and use `@conda` to specify step-specific additions.
1316
-
1317
-
1318
- Parameters
1319
- ----------
1320
- packages : Dict[str, str], default {}
1321
- Packages to use for this flow. The key is the name of the package
1322
- and the value is the version to use.
1323
- libraries : Dict[str, str], default {}
1324
- Supported for backward compatibility. When used with packages, packages will take precedence.
1325
- python : str, optional, default None
1326
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1327
- that the version used will correspond to the version of the Python interpreter used to start the run.
1328
- disabled : bool, default False
1329
- If set to True, disables Conda.
1330
- """
1376
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1331
1377
  ...
1332
1378
 
1333
- @typing.overload
1334
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1379
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1335
1380
  """
1336
- Specifies the PyPI packages for all steps of the flow.
1381
+ Specifies the times when the flow should be run when running on a
1382
+ production scheduler.
1337
1383
 
1338
- Use `@pypi_base` to set common packages required by all
1339
- steps and use `@pypi` to specify step-specific overrides.
1340
1384
 
1341
1385
  Parameters
1342
1386
  ----------
1343
- packages : Dict[str, str], default: {}
1344
- Packages to use for this flow. The key is the name of the package
1345
- and the value is the version to use.
1346
- python : str, optional, default: None
1347
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1348
- that the version used will correspond to the version of the Python interpreter used to start the run.
1387
+ hourly : bool, default False
1388
+ Run the workflow hourly.
1389
+ daily : bool, default True
1390
+ Run the workflow daily.
1391
+ weekly : bool, default False
1392
+ Run the workflow weekly.
1393
+ cron : str, optional, default None
1394
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1395
+ specified by this expression.
1396
+ timezone : str, optional, default None
1397
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1398
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1349
1399
  """
1350
1400
  ...
1351
1401
 
1352
- @typing.overload
1353
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1354
- ...
1355
-
1356
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1402
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1357
1403
  """
1358
- Specifies the PyPI packages for all steps of the flow.
1404
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1405
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1359
1406
 
1360
- Use `@pypi_base` to set common packages required by all
1361
- steps and use `@pypi` to specify step-specific overrides.
1362
1407
 
1363
1408
  Parameters
1364
1409
  ----------
1365
- packages : Dict[str, str], default: {}
1366
- Packages to use for this flow. The key is the name of the package
1367
- and the value is the version to use.
1368
- python : str, optional, default: None
1369
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1370
- that the version used will correspond to the version of the Python interpreter used to start the run.
1410
+ timeout : int
1411
+ Time, in seconds before the task times out and fails. (Default: 3600)
1412
+ poke_interval : int
1413
+ Time in seconds that the job should wait in between each try. (Default: 60)
1414
+ mode : str
1415
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1416
+ exponential_backoff : bool
1417
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1418
+ pool : str
1419
+ the slot pool this task should run in,
1420
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1421
+ soft_fail : bool
1422
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1423
+ name : str
1424
+ Name of the sensor on Airflow
1425
+ description : str
1426
+ Description of sensor in the Airflow UI
1427
+ external_dag_id : str
1428
+ The dag_id that contains the task you want to wait for.
1429
+ external_task_ids : List[str]
1430
+ The list of task_ids that you want to wait for.
1431
+ If None (default value) the sensor waits for the DAG. (Default: None)
1432
+ allowed_states : List[str]
1433
+ Iterable of allowed states, (Default: ['success'])
1434
+ failed_states : List[str]
1435
+ Iterable of failed or dis-allowed states. (Default: None)
1436
+ execution_delta : datetime.timedelta
1437
+ time difference with the previous execution to look at,
1438
+ the default is the same logical date as the current task or DAG. (Default: None)
1439
+ check_existence: bool
1440
+ Set to True to check if the external task exists or check if
1441
+ the DAG to wait for exists. (Default: True)
1371
1442
  """
1372
1443
  ...
1373
1444
 
1374
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1445
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1375
1446
  """
1376
- Allows setting external datastores to save data for the
1377
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1378
-
1379
- This decorator is useful when users wish to save data to a different datastore
1380
- than what is configured in Metaflow. This can be for variety of reasons:
1381
-
1382
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1383
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1384
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1385
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1386
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1387
-
1388
- Usage:
1389
- ----------
1390
-
1391
- - Using a custom IAM role to access the datastore.
1392
-
1393
- ```python
1394
- @with_artifact_store(
1395
- type="s3",
1396
- config=lambda: {
1397
- "root": "s3://my-bucket-foo/path/to/root",
1398
- "role_arn": ROLE,
1399
- },
1400
- )
1401
- class MyFlow(FlowSpec):
1402
-
1403
- @checkpoint
1404
- @step
1405
- def start(self):
1406
- with open("my_file.txt", "w") as f:
1407
- f.write("Hello, World!")
1408
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1409
- self.next(self.end)
1410
-
1411
- ```
1412
-
1413
- - Using credentials to access the s3-compatible datastore.
1414
-
1415
- ```python
1416
- @with_artifact_store(
1417
- type="s3",
1418
- config=lambda: {
1419
- "root": "s3://my-bucket-foo/path/to/root",
1420
- "client_params": {
1421
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1422
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1423
- },
1424
- },
1425
- )
1426
- class MyFlow(FlowSpec):
1427
-
1428
- @checkpoint
1429
- @step
1430
- def start(self):
1431
- with open("my_file.txt", "w") as f:
1432
- f.write("Hello, World!")
1433
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1434
- self.next(self.end)
1435
-
1436
- ```
1447
+ Specifies what flows belong to the same project.
1437
1448
 
1438
- - Accessing objects stored in external datastores after task execution.
1449
+ A project-specific namespace is created for all flows that
1450
+ use the same `@project(name)`.
1439
1451
 
1440
- ```python
1441
- run = Run("CheckpointsTestsFlow/8992")
1442
- with artifact_store_from(run=run, config={
1443
- "client_params": {
1444
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1445
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1446
- },
1447
- }):
1448
- with Checkpoint() as cp:
1449
- latest = cp.list(
1450
- task=run["start"].task
1451
- )[0]
1452
- print(latest)
1453
- cp.load(
1454
- latest,
1455
- "test-checkpoints"
1456
- )
1457
1452
 
1458
- task = Task("TorchTuneFlow/8484/train/53673")
1459
- with artifact_store_from(run=run, config={
1460
- "client_params": {
1461
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1462
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1463
- },
1464
- }):
1465
- load_model(
1466
- task.data.model_ref,
1467
- "test-models"
1468
- )
1469
- ```
1470
- Parameters:
1453
+ Parameters
1471
1454
  ----------
1455
+ name : str
1456
+ Project name. Make sure that the name is unique amongst all
1457
+ projects that use the same production scheduler. The name may
1458
+ contain only lowercase alphanumeric characters and underscores.
1472
1459
 
1473
- type: str
1474
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1460
+ branch : Optional[str], default None
1461
+ The branch to use. If not specified, the branch is set to
1462
+ `user.<username>` unless `production` is set to `True`. This can
1463
+ also be set on the command line using `--branch` as a top-level option.
1464
+ It is an error to specify `branch` in the decorator and on the command line.
1475
1465
 
1476
- config: dict or Callable
1477
- Dictionary of configuration options for the datastore. The following keys are required:
1478
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1479
- - example: 's3://bucket-name/path/to/root'
1480
- - example: 'gs://bucket-name/path/to/root'
1481
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1482
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1483
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1484
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1466
+ production : bool, default False
1467
+ Whether or not the branch is the production branch. This can also be set on the
1468
+ command line using `--production` as a top-level option. It is an error to specify
1469
+ `production` in the decorator and on the command line.
1470
+ The project branch name will be:
1471
+ - if `branch` is specified:
1472
+ - if `production` is True: `prod.<branch>`
1473
+ - if `production` is False: `test.<branch>`
1474
+ - if `branch` is not specified:
1475
+ - if `production` is True: `prod`
1476
+ - if `production` is False: `user.<username>`
1485
1477
  """
1486
1478
  ...
1487
1479
 
@@ -1586,175 +1578,117 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1586
1578
  """
1587
1579
  ...
1588
1580
 
1589
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1581
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1590
1582
  """
1591
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1592
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1583
+ Allows setting external datastores to save data for the
1584
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1585
+
1586
+ This decorator is useful when users wish to save data to a different datastore
1587
+ than what is configured in Metaflow. This can be for variety of reasons:
1593
1588
 
1589
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1590
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1591
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1592
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1593
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1594
1594
 
1595
- Parameters
1595
+ Usage:
1596
1596
  ----------
1597
- timeout : int
1598
- Time, in seconds before the task times out and fails. (Default: 3600)
1599
- poke_interval : int
1600
- Time in seconds that the job should wait in between each try. (Default: 60)
1601
- mode : str
1602
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1603
- exponential_backoff : bool
1604
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1605
- pool : str
1606
- the slot pool this task should run in,
1607
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1608
- soft_fail : bool
1609
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1610
- name : str
1611
- Name of the sensor on Airflow
1612
- description : str
1613
- Description of sensor in the Airflow UI
1614
- external_dag_id : str
1615
- The dag_id that contains the task you want to wait for.
1616
- external_task_ids : List[str]
1617
- The list of task_ids that you want to wait for.
1618
- If None (default value) the sensor waits for the DAG. (Default: None)
1619
- allowed_states : List[str]
1620
- Iterable of allowed states, (Default: ['success'])
1621
- failed_states : List[str]
1622
- Iterable of failed or dis-allowed states. (Default: None)
1623
- execution_delta : datetime.timedelta
1624
- time difference with the previous execution to look at,
1625
- the default is the same logical date as the current task or DAG. (Default: None)
1626
- check_existence: bool
1627
- Set to True to check if the external task exists or check if
1628
- the DAG to wait for exists. (Default: True)
1629
- """
1630
- ...
1631
-
1632
- @typing.overload
1633
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1634
- """
1635
- Specifies the times when the flow should be run when running on a
1636
- production scheduler.
1637
1597
 
1598
+ - Using a custom IAM role to access the datastore.
1638
1599
 
1639
- Parameters
1640
- ----------
1641
- hourly : bool, default False
1642
- Run the workflow hourly.
1643
- daily : bool, default True
1644
- Run the workflow daily.
1645
- weekly : bool, default False
1646
- Run the workflow weekly.
1647
- cron : str, optional, default None
1648
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1649
- specified by this expression.
1650
- timezone : str, optional, default None
1651
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1652
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1653
- """
1654
- ...
1655
-
1656
- @typing.overload
1657
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1658
- ...
1659
-
1660
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1661
- """
1662
- Specifies the times when the flow should be run when running on a
1663
- production scheduler.
1600
+ ```python
1601
+ @with_artifact_store(
1602
+ type="s3",
1603
+ config=lambda: {
1604
+ "root": "s3://my-bucket-foo/path/to/root",
1605
+ "role_arn": ROLE,
1606
+ },
1607
+ )
1608
+ class MyFlow(FlowSpec):
1609
+
1610
+ @checkpoint
1611
+ @step
1612
+ def start(self):
1613
+ with open("my_file.txt", "w") as f:
1614
+ f.write("Hello, World!")
1615
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1616
+ self.next(self.end)
1664
1617
 
1618
+ ```
1665
1619
 
1666
- Parameters
1667
- ----------
1668
- hourly : bool, default False
1669
- Run the workflow hourly.
1670
- daily : bool, default True
1671
- Run the workflow daily.
1672
- weekly : bool, default False
1673
- Run the workflow weekly.
1674
- cron : str, optional, default None
1675
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1676
- specified by this expression.
1677
- timezone : str, optional, default None
1678
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1679
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1680
- """
1681
- ...
1682
-
1683
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1684
- """
1685
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1686
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1687
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1688
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1689
- starts only after all sensors finish.
1620
+ - Using credentials to access the s3-compatible datastore.
1621
+
1622
+ ```python
1623
+ @with_artifact_store(
1624
+ type="s3",
1625
+ config=lambda: {
1626
+ "root": "s3://my-bucket-foo/path/to/root",
1627
+ "client_params": {
1628
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1629
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1630
+ },
1631
+ },
1632
+ )
1633
+ class MyFlow(FlowSpec):
1690
1634
 
1635
+ @checkpoint
1636
+ @step
1637
+ def start(self):
1638
+ with open("my_file.txt", "w") as f:
1639
+ f.write("Hello, World!")
1640
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1641
+ self.next(self.end)
1691
1642
 
1692
- Parameters
1693
- ----------
1694
- timeout : int
1695
- Time, in seconds before the task times out and fails. (Default: 3600)
1696
- poke_interval : int
1697
- Time in seconds that the job should wait in between each try. (Default: 60)
1698
- mode : str
1699
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1700
- exponential_backoff : bool
1701
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1702
- pool : str
1703
- the slot pool this task should run in,
1704
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1705
- soft_fail : bool
1706
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1707
- name : str
1708
- Name of the sensor on Airflow
1709
- description : str
1710
- Description of sensor in the Airflow UI
1711
- bucket_key : Union[str, List[str]]
1712
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1713
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1714
- bucket_name : str
1715
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1716
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1717
- wildcard_match : bool
1718
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1719
- aws_conn_id : str
1720
- a reference to the s3 connection on Airflow. (Default: None)
1721
- verify : bool
1722
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1723
- """
1724
- ...
1725
-
1726
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1727
- """
1728
- Specifies what flows belong to the same project.
1643
+ ```
1729
1644
 
1730
- A project-specific namespace is created for all flows that
1731
- use the same `@project(name)`.
1645
+ - Accessing objects stored in external datastores after task execution.
1732
1646
 
1647
+ ```python
1648
+ run = Run("CheckpointsTestsFlow/8992")
1649
+ with artifact_store_from(run=run, config={
1650
+ "client_params": {
1651
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1652
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1653
+ },
1654
+ }):
1655
+ with Checkpoint() as cp:
1656
+ latest = cp.list(
1657
+ task=run["start"].task
1658
+ )[0]
1659
+ print(latest)
1660
+ cp.load(
1661
+ latest,
1662
+ "test-checkpoints"
1663
+ )
1733
1664
 
1734
- Parameters
1665
+ task = Task("TorchTuneFlow/8484/train/53673")
1666
+ with artifact_store_from(run=run, config={
1667
+ "client_params": {
1668
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1669
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1670
+ },
1671
+ }):
1672
+ load_model(
1673
+ task.data.model_ref,
1674
+ "test-models"
1675
+ )
1676
+ ```
1677
+ Parameters:
1735
1678
  ----------
1736
- name : str
1737
- Project name. Make sure that the name is unique amongst all
1738
- projects that use the same production scheduler. The name may
1739
- contain only lowercase alphanumeric characters and underscores.
1740
1679
 
1741
- branch : Optional[str], default None
1742
- The branch to use. If not specified, the branch is set to
1743
- `user.<username>` unless `production` is set to `True`. This can
1744
- also be set on the command line using `--branch` as a top-level option.
1745
- It is an error to specify `branch` in the decorator and on the command line.
1680
+ type: str
1681
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1746
1682
 
1747
- production : bool, default False
1748
- Whether or not the branch is the production branch. This can also be set on the
1749
- command line using `--production` as a top-level option. It is an error to specify
1750
- `production` in the decorator and on the command line.
1751
- The project branch name will be:
1752
- - if `branch` is specified:
1753
- - if `production` is True: `prod.<branch>`
1754
- - if `production` is False: `test.<branch>`
1755
- - if `branch` is not specified:
1756
- - if `production` is True: `prod`
1757
- - if `production` is False: `user.<username>`
1683
+ config: dict or Callable
1684
+ Dictionary of configuration options for the datastore. The following keys are required:
1685
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1686
+ - example: 's3://bucket-name/path/to/root'
1687
+ - example: 'gs://bucket-name/path/to/root'
1688
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1689
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1690
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1691
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1758
1692
  """
1759
1693
  ...
1760
1694
 
@@ -1851,5 +1785,140 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1851
1785
  """
1852
1786
  ...
1853
1787
 
1788
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1789
+ """
1790
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1791
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1792
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1793
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1794
+ starts only after all sensors finish.
1795
+
1796
+
1797
+ Parameters
1798
+ ----------
1799
+ timeout : int
1800
+ Time, in seconds before the task times out and fails. (Default: 3600)
1801
+ poke_interval : int
1802
+ Time in seconds that the job should wait in between each try. (Default: 60)
1803
+ mode : str
1804
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1805
+ exponential_backoff : bool
1806
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1807
+ pool : str
1808
+ the slot pool this task should run in,
1809
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1810
+ soft_fail : bool
1811
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1812
+ name : str
1813
+ Name of the sensor on Airflow
1814
+ description : str
1815
+ Description of sensor in the Airflow UI
1816
+ bucket_key : Union[str, List[str]]
1817
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1818
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1819
+ bucket_name : str
1820
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1821
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1822
+ wildcard_match : bool
1823
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1824
+ aws_conn_id : str
1825
+ a reference to the s3 connection on Airflow. (Default: None)
1826
+ verify : bool
1827
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1828
+ """
1829
+ ...
1830
+
1831
+ @typing.overload
1832
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1833
+ """
1834
+ Specifies the Conda environment for all steps of the flow.
1835
+
1836
+ Use `@conda_base` to set common libraries required by all
1837
+ steps and use `@conda` to specify step-specific additions.
1838
+
1839
+
1840
+ Parameters
1841
+ ----------
1842
+ packages : Dict[str, str], default {}
1843
+ Packages to use for this flow. The key is the name of the package
1844
+ and the value is the version to use.
1845
+ libraries : Dict[str, str], default {}
1846
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1847
+ python : str, optional, default None
1848
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1849
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1850
+ disabled : bool, default False
1851
+ If set to True, disables Conda.
1852
+ """
1853
+ ...
1854
+
1855
+ @typing.overload
1856
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1857
+ ...
1858
+
1859
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1860
+ """
1861
+ Specifies the Conda environment for all steps of the flow.
1862
+
1863
+ Use `@conda_base` to set common libraries required by all
1864
+ steps and use `@conda` to specify step-specific additions.
1865
+
1866
+
1867
+ Parameters
1868
+ ----------
1869
+ packages : Dict[str, str], default {}
1870
+ Packages to use for this flow. The key is the name of the package
1871
+ and the value is the version to use.
1872
+ libraries : Dict[str, str], default {}
1873
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1874
+ python : str, optional, default None
1875
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1876
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1877
+ disabled : bool, default False
1878
+ If set to True, disables Conda.
1879
+ """
1880
+ ...
1881
+
1882
+ @typing.overload
1883
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1884
+ """
1885
+ Specifies the PyPI packages for all steps of the flow.
1886
+
1887
+ Use `@pypi_base` to set common packages required by all
1888
+ steps and use `@pypi` to specify step-specific overrides.
1889
+
1890
+ Parameters
1891
+ ----------
1892
+ packages : Dict[str, str], default: {}
1893
+ Packages to use for this flow. The key is the name of the package
1894
+ and the value is the version to use.
1895
+ python : str, optional, default: None
1896
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1897
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1898
+ """
1899
+ ...
1900
+
1901
+ @typing.overload
1902
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1903
+ ...
1904
+
1905
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1906
+ """
1907
+ Specifies the PyPI packages for all steps of the flow.
1908
+
1909
+ Use `@pypi_base` to set common packages required by all
1910
+ steps and use `@pypi` to specify step-specific overrides.
1911
+
1912
+ Parameters
1913
+ ----------
1914
+ packages : Dict[str, str], default: {}
1915
+ Packages to use for this flow. The key is the name of the package
1916
+ and the value is the version to use.
1917
+ python : str, optional, default: None
1918
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1919
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1920
+ """
1921
+ ...
1922
+
1854
1923
  pkg_name: str
1855
1924