ob-metaflow-stubs 6.0.4.9__py2.py3-none-any.whl → 6.0.5.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-stubs might be problematic. Click here for more details.

Files changed (262) hide show
  1. metaflow-stubs/__init__.pyi +1094 -1019
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +3 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +7 -8
  8. metaflow-stubs/client/filecache.pyi +2 -2
  9. metaflow-stubs/events.pyi +2 -2
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +8 -8
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +4 -4
  14. metaflow-stubs/{info_file.pyi → meta_files.pyi} +2 -6
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +3 -3
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +6 -2
  20. metaflow-stubs/metaflow_current.pyi +49 -49
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +3 -3
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +4 -4
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +4 -4
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +5 -5
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +4 -4
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +3 -3
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +2 -2
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +3 -3
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +3 -3
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +3 -3
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +3 -3
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +3 -3
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +4 -4
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +3 -3
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +3 -3
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +6 -6
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +3 -3
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +7 -7
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +3 -3
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +3 -3
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +2 -2
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +3 -3
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +2 -2
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +5 -5
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +2 -2
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +6 -6
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +12 -8
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +3 -3
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +13 -8
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +11 -8
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +2 -2
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +3 -3
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +3 -3
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +2 -2
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +2 -2
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +13 -2
  115. metaflow-stubs/mf_extensions/outerbounds/toplevel/s3_proxy.pyi +24 -0
  116. metaflow-stubs/multicore_utils.pyi +2 -2
  117. metaflow-stubs/ob_internal.pyi +2 -2
  118. metaflow-stubs/packaging_sys/__init__.pyi +430 -0
  119. metaflow-stubs/packaging_sys/backend.pyi +86 -0
  120. metaflow-stubs/packaging_sys/distribution_support.pyi +57 -0
  121. metaflow-stubs/packaging_sys/tar_backend.pyi +62 -0
  122. metaflow-stubs/packaging_sys/utils.pyi +26 -0
  123. metaflow-stubs/packaging_sys/v1.pyi +145 -0
  124. metaflow-stubs/parameters.pyi +4 -4
  125. metaflow-stubs/plugins/__init__.pyi +14 -14
  126. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  127. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  128. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  129. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  130. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  131. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  132. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  133. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  134. metaflow-stubs/plugins/argo/argo_client.pyi +2 -2
  135. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  136. metaflow-stubs/plugins/argo/argo_workflows.pyi +4 -4
  137. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +2 -2
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +4 -4
  139. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +3 -3
  140. metaflow-stubs/plugins/argo/exit_hooks.pyi +3 -3
  141. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  142. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  143. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  144. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  145. metaflow-stubs/plugins/aws/batch/batch.pyi +4 -4
  146. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  147. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +4 -2
  148. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  149. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +3 -3
  150. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  151. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  152. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  153. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +3 -3
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +4 -4
  156. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +3 -3
  157. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  158. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  159. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  160. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +3 -3
  161. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  162. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  163. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  164. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_client.pyi +3 -3
  166. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  167. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_decorator.pyi +3 -5
  169. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  170. metaflow-stubs/plugins/cards/card_modules/basic.pyi +3 -3
  171. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  172. metaflow-stubs/plugins/cards/card_modules/components.pyi +4 -4
  173. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  174. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  175. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  176. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  177. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  178. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  179. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  180. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  181. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  182. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  183. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  184. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  185. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  186. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  187. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  188. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  189. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  190. metaflow-stubs/plugins/exit_hook/__init__.pyi +2 -2
  191. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +2 -2
  192. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  193. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  194. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  195. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +3 -3
  196. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  197. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  198. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  199. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  200. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  201. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +3 -3
  202. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +4 -4
  203. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  204. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +5 -3
  205. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  206. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  207. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  208. metaflow-stubs/plugins/parallel_decorator.pyi +3 -3
  209. metaflow-stubs/plugins/perimeters.pyi +2 -2
  210. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  211. metaflow-stubs/plugins/pypi/__init__.pyi +3 -3
  212. metaflow-stubs/plugins/pypi/conda_decorator.pyi +5 -8
  213. metaflow-stubs/plugins/pypi/conda_environment.pyi +4 -3
  214. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  215. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +4 -4
  216. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  217. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  218. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  219. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  220. metaflow-stubs/plugins/secrets/__init__.pyi +3 -3
  221. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +2 -2
  222. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  223. metaflow-stubs/plugins/secrets/secrets_func.pyi +2 -2
  224. metaflow-stubs/plugins/secrets/secrets_spec.pyi +2 -2
  225. metaflow-stubs/plugins/secrets/utils.pyi +2 -2
  226. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  227. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  228. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +3 -3
  229. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  230. metaflow-stubs/plugins/torchtune/__init__.pyi +2 -2
  231. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  232. metaflow-stubs/plugins/uv/uv_environment.pyi +3 -2
  233. metaflow-stubs/profilers/__init__.pyi +2 -2
  234. metaflow-stubs/pylint_wrapper.pyi +2 -2
  235. metaflow-stubs/runner/__init__.pyi +2 -2
  236. metaflow-stubs/runner/deployer.pyi +34 -34
  237. metaflow-stubs/runner/deployer_impl.pyi +3 -3
  238. metaflow-stubs/runner/metaflow_runner.pyi +4 -4
  239. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  240. metaflow-stubs/runner/nbrun.pyi +2 -2
  241. metaflow-stubs/runner/subprocess_manager.pyi +3 -2
  242. metaflow-stubs/runner/utils.pyi +3 -3
  243. metaflow-stubs/system/__init__.pyi +2 -2
  244. metaflow-stubs/system/system_logger.pyi +3 -3
  245. metaflow-stubs/system/system_monitor.pyi +2 -2
  246. metaflow-stubs/tagging_util.pyi +2 -2
  247. metaflow-stubs/tuple_util.pyi +2 -2
  248. metaflow-stubs/user_configs/__init__.pyi +2 -3
  249. metaflow-stubs/user_configs/config_options.pyi +4 -5
  250. metaflow-stubs/user_configs/config_parameters.pyi +6 -8
  251. metaflow-stubs/user_decorators/__init__.pyi +15 -0
  252. metaflow-stubs/user_decorators/common.pyi +38 -0
  253. metaflow-stubs/user_decorators/mutable_flow.pyi +223 -0
  254. metaflow-stubs/user_decorators/mutable_step.pyi +152 -0
  255. metaflow-stubs/user_decorators/user_flow_decorator.pyi +137 -0
  256. metaflow-stubs/user_decorators/user_step_decorator.pyi +323 -0
  257. {ob_metaflow_stubs-6.0.4.9.dist-info → ob_metaflow_stubs-6.0.5.1.dist-info}/METADATA +1 -1
  258. ob_metaflow_stubs-6.0.5.1.dist-info/RECORD +261 -0
  259. metaflow-stubs/user_configs/config_decorators.pyi +0 -251
  260. ob_metaflow_stubs-6.0.4.9.dist-info/RECORD +0 -249
  261. {ob_metaflow_stubs-6.0.4.9.dist-info → ob_metaflow_stubs-6.0.5.1.dist-info}/WHEEL +0 -0
  262. {ob_metaflow_stubs-6.0.4.9.dist-info → ob_metaflow_stubs-6.0.5.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.15.21.5+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-07-30T20:52:28.447575 #
3
+ # MF version: 2.16.8.1+obcheckpoint(0.2.4);ob(v1) #
4
+ # Generated on 2025-08-01T20:12:28.874985 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
@@ -13,7 +13,8 @@ if typing.TYPE_CHECKING:
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
16
- from . import info_file as info_file
16
+ from . import meta_files as meta_files
17
+ from . import packaging_sys as packaging_sys
17
18
  from . import exception as exception
18
19
  from . import metaflow_config as metaflow_config
19
20
  from . import multicore_utils as multicore_utils
@@ -23,6 +24,7 @@ from . import metaflow_current as metaflow_current
23
24
  from .metaflow_current import current as current
24
25
  from . import parameters as parameters
25
26
  from . import user_configs as user_configs
27
+ from . import user_decorators as user_decorators
26
28
  from . import tagging_util as tagging_util
27
29
  from . import metadata_provider as metadata_provider
28
30
  from . import flowspec as flowspec
@@ -33,20 +35,22 @@ from .parameters import JSONType as JSONType
33
35
  from .user_configs.config_parameters import Config as Config
34
36
  from .user_configs.config_parameters import ConfigValue as ConfigValue
35
37
  from .user_configs.config_parameters import config_expr as config_expr
36
- from .user_configs.config_decorators import CustomFlowDecorator as CustomFlowDecorator
37
- from .user_configs.config_decorators import CustomStepDecorator as CustomStepDecorator
38
+ from .user_decorators.user_step_decorator import UserStepDecorator as UserStepDecorator
39
+ from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
+ from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
+ from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
38
42
  from . import cards as cards
39
- from . import tuple_util as tuple_util
40
43
  from . import metaflow_git as metaflow_git
44
+ from . import tuple_util as tuple_util
41
45
  from . import events as events
42
46
  from . import runner as runner
43
47
  from . import plugins as plugins
44
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
45
49
  from . import includefile as includefile
46
50
  from .includefile import IncludeFile as IncludeFile
51
+ from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
47
52
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
48
53
  from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
49
- from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
50
54
  from . import client as client
51
55
  from .client.core import namespace as namespace
52
56
  from .client.core import get_namespace as get_namespace
@@ -68,6 +72,11 @@ from .runner.nbdeploy import NBDeployer as NBDeployer
68
72
  from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.checkpoints.final_api import Checkpoint as Checkpoint
69
73
  from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.datastructures import load_model as load_model
70
74
  from .mf_extensions.obcheckpoint.plugins.machine_learning_utilities.datastore.context import artifact_store_from as artifact_store_from
75
+ from .mf_extensions.outerbounds.toplevel.s3_proxy import get_aws_client_with_s3_proxy as get_aws_client_with_s3_proxy
76
+ from .mf_extensions.outerbounds.toplevel.s3_proxy import get_S3_with_s3_proxy as get_S3_with_s3_proxy
77
+ from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import set_s3_proxy_config as set_s3_proxy_config
78
+ from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import clear_s3_proxy_config as clear_s3_proxy_config
79
+ from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import get_s3_proxy_config as get_s3_proxy_config
71
80
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import get_aws_client as get_aws_client
72
81
  from .mf_extensions.outerbounds.plugins.snowflake.snowflake import Snowflake as Snowflake
73
82
  from .mf_extensions.outerbounds.plugins.checkpoint_datastores.nebius import nebius_checkpoints as nebius_checkpoints
@@ -83,6 +92,8 @@ from . import ob_internal as ob_internal
83
92
 
84
93
  EXT_PKG: str
85
94
 
95
+ USER_SKIP_STEP: dict
96
+
86
97
  @typing.overload
87
98
  def step(f: typing.Callable[[FlowSpecDerived], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
88
99
  """
@@ -156,157 +167,6 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
156
167
  """
157
168
  ...
158
169
 
159
- @typing.overload
160
- def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
161
- """
162
- Decorator prototype for all step decorators. This function gets specialized
163
- and imported for all decorators types by _import_plugin_decorators().
164
- """
165
- ...
166
-
167
- @typing.overload
168
- def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
169
- ...
170
-
171
- def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
172
- """
173
- Decorator prototype for all step decorators. This function gets specialized
174
- and imported for all decorators types by _import_plugin_decorators().
175
- """
176
- ...
177
-
178
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
179
- """
180
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
181
-
182
- User code call
183
- --------------
184
- @ollama(
185
- models=[...],
186
- ...
187
- )
188
-
189
- Valid backend options
190
- ---------------------
191
- - 'local': Run as a separate process on the local task machine.
192
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
193
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
194
-
195
- Valid model options
196
- -------------------
197
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
198
-
199
-
200
- Parameters
201
- ----------
202
- models: list[str]
203
- List of Ollama containers running models in sidecars.
204
- backend: str
205
- Determines where and how to run the Ollama process.
206
- force_pull: bool
207
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
208
- cache_update_policy: str
209
- Cache update policy: "auto", "force", or "never".
210
- force_cache_update: bool
211
- Simple override for "force" cache update policy.
212
- debug: bool
213
- Whether to turn on verbose debugging logs.
214
- circuit_breaker_config: dict
215
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
216
- timeout_config: dict
217
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
218
- """
219
- ...
220
-
221
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[str] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
222
- """
223
- Specifies that this step should execute on Kubernetes.
224
-
225
-
226
- Parameters
227
- ----------
228
- cpu : int, default 1
229
- Number of CPUs required for this step. If `@resources` is
230
- also present, the maximum value from all decorators is used.
231
- memory : int, default 4096
232
- Memory size (in MB) required for this step. If
233
- `@resources` is also present, the maximum value from all decorators is
234
- used.
235
- disk : int, default 10240
236
- Disk size (in MB) required for this step. If
237
- `@resources` is also present, the maximum value from all decorators is
238
- used.
239
- image : str, optional, default None
240
- Docker image to use when launching on Kubernetes. If not specified, and
241
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
242
- not, a default Docker image mapping to the current version of Python is used.
243
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
244
- If given, the imagePullPolicy to be applied to the Docker image of the step.
245
- image_pull_secrets: List[str], default []
246
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
247
- Kubernetes image pull secrets to use when pulling container images
248
- in Kubernetes.
249
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
250
- Kubernetes service account to use when launching pod in Kubernetes.
251
- secrets : List[str], optional, default None
252
- Kubernetes secrets to use when launching pod in Kubernetes. These
253
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
254
- in Metaflow configuration.
255
- node_selector: Union[Dict[str,str], str], optional, default None
256
- Kubernetes node selector(s) to apply to the pod running the task.
257
- Can be passed in as a comma separated string of values e.g.
258
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
259
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
260
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
261
- Kubernetes namespace to use when launching pod in Kubernetes.
262
- gpu : int, optional, default None
263
- Number of GPUs required for this step. A value of zero implies that
264
- the scheduled node should not have GPUs.
265
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
266
- The vendor of the GPUs to be used for this step.
267
- tolerations : List[str], default []
268
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
269
- Kubernetes tolerations to use when launching pod in Kubernetes.
270
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
271
- Kubernetes labels to use when launching pod in Kubernetes.
272
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
273
- Kubernetes annotations to use when launching pod in Kubernetes.
274
- use_tmpfs : bool, default False
275
- This enables an explicit tmpfs mount for this step.
276
- tmpfs_tempdir : bool, default True
277
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
278
- tmpfs_size : int, optional, default: None
279
- The value for the size (in MiB) of the tmpfs mount for this step.
280
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
281
- memory allocated for this step.
282
- tmpfs_path : str, optional, default /metaflow_temp
283
- Path to tmpfs mount for this step.
284
- persistent_volume_claims : Dict[str, str], optional, default None
285
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
286
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
287
- shared_memory: int, optional
288
- Shared memory size (in MiB) required for this step
289
- port: int, optional
290
- Port number to specify in the Kubernetes job object
291
- compute_pool : str, optional, default None
292
- Compute pool to be used for for this step.
293
- If not specified, any accessible compute pool within the perimeter is used.
294
- hostname_resolution_timeout: int, default 10 * 60
295
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
296
- Only applicable when @parallel is used.
297
- qos: str, default: Burstable
298
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
299
-
300
- security_context: Dict[str, Any], optional, default None
301
- Container security context. Applies to the task container. Allows the following keys:
302
- - privileged: bool, optional, default None
303
- - allow_privilege_escalation: bool, optional, default None
304
- - run_as_user: int, optional, default None
305
- - run_as_group: int, optional, default None
306
- - run_as_non_root: bool, optional, default None
307
- """
308
- ...
309
-
310
170
  @typing.overload
311
171
  def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
312
172
  """
@@ -359,180 +219,128 @@ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], ty
359
219
  ...
360
220
 
361
221
  @typing.overload
362
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
222
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
363
223
  """
364
- Enables loading / saving of models within a step.
365
-
366
- > Examples
367
- - Saving Models
368
- ```python
369
- @model
370
- @step
371
- def train(self):
372
- # current.model.save returns a dictionary reference to the model saved
373
- self.my_model = current.model.save(
374
- path_to_my_model,
375
- label="my_model",
376
- metadata={
377
- "epochs": 10,
378
- "batch-size": 32,
379
- "learning-rate": 0.001,
380
- }
381
- )
382
- self.next(self.test)
383
-
384
- @model(load="my_model")
385
- @step
386
- def test(self):
387
- # `current.model.loaded` returns a dictionary of the loaded models
388
- # where the key is the name of the artifact and the value is the path to the model
389
- print(os.listdir(current.model.loaded["my_model"]))
390
- self.next(self.end)
391
- ```
392
-
393
- - Loading models
394
- ```python
395
- @step
396
- def train(self):
397
- # current.model.load returns the path to the model loaded
398
- checkpoint_path = current.model.load(
399
- self.checkpoint_key,
400
- )
401
- model_path = current.model.load(
402
- self.model,
403
- )
404
- self.next(self.test)
405
- ```
224
+ Specifies secrets to be retrieved and injected as environment variables prior to
225
+ the execution of a step.
406
226
 
407
227
 
408
228
  Parameters
409
229
  ----------
410
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
411
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
412
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
413
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
414
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
415
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
416
-
417
- temp_dir_root : str, default: None
418
- The root directory under which `current.model.loaded` will store loaded models
230
+ sources : List[Union[str, Dict[str, Any]]], default: []
231
+ List of secret specs, defining how the secrets are to be retrieved
232
+ role : str, optional, default: None
233
+ Role to use for fetching secrets
419
234
  """
420
235
  ...
421
236
 
422
237
  @typing.overload
423
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
238
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
424
239
  ...
425
240
 
426
241
  @typing.overload
427
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
242
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
428
243
  ...
429
244
 
430
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
245
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
431
246
  """
432
- Enables loading / saving of models within a step.
433
-
434
- > Examples
435
- - Saving Models
436
- ```python
437
- @model
438
- @step
439
- def train(self):
440
- # current.model.save returns a dictionary reference to the model saved
441
- self.my_model = current.model.save(
442
- path_to_my_model,
443
- label="my_model",
444
- metadata={
445
- "epochs": 10,
446
- "batch-size": 32,
447
- "learning-rate": 0.001,
448
- }
449
- )
450
- self.next(self.test)
451
-
452
- @model(load="my_model")
453
- @step
454
- def test(self):
455
- # `current.model.loaded` returns a dictionary of the loaded models
456
- # where the key is the name of the artifact and the value is the path to the model
457
- print(os.listdir(current.model.loaded["my_model"]))
458
- self.next(self.end)
459
- ```
460
-
461
- - Loading models
462
- ```python
463
- @step
464
- def train(self):
465
- # current.model.load returns the path to the model loaded
466
- checkpoint_path = current.model.load(
467
- self.checkpoint_key,
468
- )
469
- model_path = current.model.load(
470
- self.model,
471
- )
472
- self.next(self.test)
473
- ```
247
+ Specifies secrets to be retrieved and injected as environment variables prior to
248
+ the execution of a step.
474
249
 
475
250
 
476
251
  Parameters
477
252
  ----------
478
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
479
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
480
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
481
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
482
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
483
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
484
-
485
- temp_dir_root : str, default: None
486
- The root directory under which `current.model.loaded` will store loaded models
253
+ sources : List[Union[str, Dict[str, Any]]], default: []
254
+ List of secret specs, defining how the secrets are to be retrieved
255
+ role : str, optional, default: None
256
+ Role to use for fetching secrets
487
257
  """
488
258
  ...
489
259
 
490
260
  @typing.overload
491
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
261
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
492
262
  """
493
- Creates a human-readable report, a Metaflow Card, after this step completes.
263
+ Internal decorator to support Fast bakery
264
+ """
265
+ ...
266
+
267
+ @typing.overload
268
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
269
+ ...
270
+
271
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
272
+ """
273
+ Internal decorator to support Fast bakery
274
+ """
275
+ ...
276
+
277
+ @typing.overload
278
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
279
+ """
280
+ A simple decorator that demonstrates using CardDecoratorInjector
281
+ to inject a card and render simple markdown content.
282
+ """
283
+ ...
284
+
285
+ @typing.overload
286
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
287
+ ...
288
+
289
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
290
+ """
291
+ A simple decorator that demonstrates using CardDecoratorInjector
292
+ to inject a card and render simple markdown content.
293
+ """
294
+ ...
295
+
296
+ @typing.overload
297
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
298
+ """
299
+ Specifies the PyPI packages for the step.
494
300
 
495
- Note that you may add multiple `@card` decorators in a step with different parameters.
301
+ Information in this decorator will augment any
302
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
303
+ you can use `@pypi_base` to set packages required by all
304
+ steps and use `@pypi` to specify step-specific overrides.
496
305
 
497
306
 
498
307
  Parameters
499
308
  ----------
500
- type : str, default 'default'
501
- Card type.
502
- id : str, optional, default None
503
- If multiple cards are present, use this id to identify this card.
504
- options : Dict[str, Any], default {}
505
- Options passed to the card. The contents depend on the card type.
506
- timeout : int, default 45
507
- Interrupt reporting if it takes more than this many seconds.
309
+ packages : Dict[str, str], default: {}
310
+ Packages to use for this step. The key is the name of the package
311
+ and the value is the version to use.
312
+ python : str, optional, default: None
313
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
314
+ that the version used will correspond to the version of the Python interpreter used to start the run.
508
315
  """
509
316
  ...
510
317
 
511
318
  @typing.overload
512
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
319
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
513
320
  ...
514
321
 
515
322
  @typing.overload
516
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
323
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
517
324
  ...
518
325
 
519
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
326
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
520
327
  """
521
- Creates a human-readable report, a Metaflow Card, after this step completes.
328
+ Specifies the PyPI packages for the step.
522
329
 
523
- Note that you may add multiple `@card` decorators in a step with different parameters.
330
+ Information in this decorator will augment any
331
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
332
+ you can use `@pypi_base` to set packages required by all
333
+ steps and use `@pypi` to specify step-specific overrides.
524
334
 
525
335
 
526
336
  Parameters
527
337
  ----------
528
- type : str, default 'default'
529
- Card type.
530
- id : str, optional, default None
531
- If multiple cards are present, use this id to identify this card.
532
- options : Dict[str, Any], default {}
533
- Options passed to the card. The contents depend on the card type.
534
- timeout : int, default 45
535
- Interrupt reporting if it takes more than this many seconds.
338
+ packages : Dict[str, str], default: {}
339
+ Packages to use for this step. The key is the name of the package
340
+ and the value is the version to use.
341
+ python : str, optional, default: None
342
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
343
+ that the version used will correspond to the version of the Python interpreter used to start the run.
536
344
  """
537
345
  ...
538
346
 
@@ -587,324 +395,572 @@ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card
587
395
  ...
588
396
 
589
397
  @typing.overload
590
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
398
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
591
399
  """
592
- Specifies a timeout for your step.
400
+ Enables loading / saving of models within a step.
593
401
 
594
- This decorator is useful if this step may hang indefinitely.
402
+ > Examples
403
+ - Saving Models
404
+ ```python
405
+ @model
406
+ @step
407
+ def train(self):
408
+ # current.model.save returns a dictionary reference to the model saved
409
+ self.my_model = current.model.save(
410
+ path_to_my_model,
411
+ label="my_model",
412
+ metadata={
413
+ "epochs": 10,
414
+ "batch-size": 32,
415
+ "learning-rate": 0.001,
416
+ }
417
+ )
418
+ self.next(self.test)
595
419
 
596
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
597
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
598
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
420
+ @model(load="my_model")
421
+ @step
422
+ def test(self):
423
+ # `current.model.loaded` returns a dictionary of the loaded models
424
+ # where the key is the name of the artifact and the value is the path to the model
425
+ print(os.listdir(current.model.loaded["my_model"]))
426
+ self.next(self.end)
427
+ ```
599
428
 
600
- Note that all the values specified in parameters are added together so if you specify
601
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
429
+ - Loading models
430
+ ```python
431
+ @step
432
+ def train(self):
433
+ # current.model.load returns the path to the model loaded
434
+ checkpoint_path = current.model.load(
435
+ self.checkpoint_key,
436
+ )
437
+ model_path = current.model.load(
438
+ self.model,
439
+ )
440
+ self.next(self.test)
441
+ ```
602
442
 
603
443
 
604
444
  Parameters
605
445
  ----------
606
- seconds : int, default 0
607
- Number of seconds to wait prior to timing out.
608
- minutes : int, default 0
609
- Number of minutes to wait prior to timing out.
610
- hours : int, default 0
611
- Number of hours to wait prior to timing out.
446
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
447
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
448
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
449
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
450
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
451
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
452
+
453
+ temp_dir_root : str, default: None
454
+ The root directory under which `current.model.loaded` will store loaded models
612
455
  """
613
456
  ...
614
457
 
615
458
  @typing.overload
616
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
459
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
617
460
  ...
618
461
 
619
462
  @typing.overload
620
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
463
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
621
464
  ...
622
465
 
623
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
466
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
624
467
  """
625
- Specifies a timeout for your step.
468
+ Enables loading / saving of models within a step.
626
469
 
627
- This decorator is useful if this step may hang indefinitely.
470
+ > Examples
471
+ - Saving Models
472
+ ```python
473
+ @model
474
+ @step
475
+ def train(self):
476
+ # current.model.save returns a dictionary reference to the model saved
477
+ self.my_model = current.model.save(
478
+ path_to_my_model,
479
+ label="my_model",
480
+ metadata={
481
+ "epochs": 10,
482
+ "batch-size": 32,
483
+ "learning-rate": 0.001,
484
+ }
485
+ )
486
+ self.next(self.test)
628
487
 
629
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
630
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
631
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
488
+ @model(load="my_model")
489
+ @step
490
+ def test(self):
491
+ # `current.model.loaded` returns a dictionary of the loaded models
492
+ # where the key is the name of the artifact and the value is the path to the model
493
+ print(os.listdir(current.model.loaded["my_model"]))
494
+ self.next(self.end)
495
+ ```
632
496
 
633
- Note that all the values specified in parameters are added together so if you specify
634
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
497
+ - Loading models
498
+ ```python
499
+ @step
500
+ def train(self):
501
+ # current.model.load returns the path to the model loaded
502
+ checkpoint_path = current.model.load(
503
+ self.checkpoint_key,
504
+ )
505
+ model_path = current.model.load(
506
+ self.model,
507
+ )
508
+ self.next(self.test)
509
+ ```
635
510
 
636
511
 
637
512
  Parameters
638
513
  ----------
639
- seconds : int, default 0
640
- Number of seconds to wait prior to timing out.
641
- minutes : int, default 0
642
- Number of minutes to wait prior to timing out.
643
- hours : int, default 0
644
- Number of hours to wait prior to timing out.
645
- """
646
- ...
647
-
648
- @typing.overload
649
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
650
- """
651
- Internal decorator to support Fast bakery
652
- """
653
- ...
654
-
655
- @typing.overload
656
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
657
- ...
658
-
659
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
660
- """
661
- Internal decorator to support Fast bakery
514
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
515
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
516
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
517
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
518
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
519
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
520
+
521
+ temp_dir_root : str, default: None
522
+ The root directory under which `current.model.loaded` will store loaded models
662
523
  """
663
524
  ...
664
525
 
665
- @typing.overload
666
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
526
+ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
667
527
  """
668
- Specifies the resources needed when executing this step.
528
+ Decorator that helps cache, version and store models/datasets from huggingface hub.
669
529
 
670
- Use `@resources` to specify the resource requirements
671
- independently of the specific compute layer (`@batch`, `@kubernetes`).
530
+ > Examples
672
531
 
673
- You can choose the compute layer on the command line by executing e.g.
532
+ **Usage: creating references of models from huggingface that may be loaded in downstream steps**
533
+ ```python
534
+ @huggingface_hub
535
+ @step
536
+ def pull_model_from_huggingface(self):
537
+ # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
538
+ # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
539
+ # with the same `repo_id` in the backend storage, it will not download the model again. The return
540
+ # value of the function is a reference to the model in the backend storage.
541
+ # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
542
+
543
+ self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
544
+ self.llama_model = current.huggingface_hub.snapshot_download(
545
+ repo_id=self.model_id,
546
+ allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
547
+ )
548
+ self.next(self.train)
674
549
  ```
675
- python myflow.py run --with batch
550
+
551
+ **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
552
+ ```python
553
+ @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
554
+ @step
555
+ def pull_model_from_huggingface(self):
556
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
676
557
  ```
677
- or
558
+
559
+ ```python
560
+ @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
561
+ @step
562
+ def finetune_model(self):
563
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
564
+ # path_to_model will be /my-directory
678
565
  ```
679
- python myflow.py run --with kubernetes
566
+
567
+ ```python
568
+ # Takes all the arguments passed to `snapshot_download`
569
+ # except for `local_dir`
570
+ @huggingface_hub(load=[
571
+ {
572
+ "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
573
+ },
574
+ {
575
+ "repo_id": "myorg/mistral-lora",
576
+ "repo_type": "model",
577
+ },
578
+ ])
579
+ @step
580
+ def finetune_model(self):
581
+ path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
582
+ # path_to_model will be /my-directory
680
583
  ```
681
- which executes the flow on the desired system using the
682
- requirements specified in `@resources`.
683
584
 
684
585
 
685
586
  Parameters
686
587
  ----------
687
- cpu : int, default 1
688
- Number of CPUs required for this step.
689
- gpu : int, optional, default None
690
- Number of GPUs required for this step.
691
- disk : int, optional, default None
692
- Disk size (in MB) required for this step. Only applies on Kubernetes.
693
- memory : int, default 4096
694
- Memory size (in MB) required for this step.
695
- shared_memory : int, optional, default None
696
- The value for the size (in MiB) of the /dev/shm volume for this step.
697
- This parameter maps to the `--shm-size` option in Docker.
588
+ temp_dir_root : str, optional
589
+ The root directory that will hold the temporary directory where objects will be downloaded.
590
+
591
+ load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
592
+ The list of repos (models/datasets) to load.
593
+
594
+ Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
595
+
596
+ - If repo (model/dataset) is not found in the datastore:
597
+ - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
598
+ - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
599
+ - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
600
+
601
+ - If repo is found in the datastore:
602
+ - Loads it directly from datastore to local path (can be temporary directory or specified path)
698
603
  """
699
604
  ...
700
605
 
701
- @typing.overload
702
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
606
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
607
+ """
608
+ Specifies that this step should execute on DGX cloud.
609
+
610
+
611
+ Parameters
612
+ ----------
613
+ gpu : int
614
+ Number of GPUs to use.
615
+ gpu_type : str
616
+ Type of Nvidia GPU to use.
617
+ queue_timeout : int
618
+ Time to keep the job in NVCF's queue.
619
+ """
703
620
  ...
704
621
 
705
622
  @typing.overload
706
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
707
- ...
708
-
709
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
623
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
710
624
  """
711
- Specifies the resources needed when executing this step.
625
+ Enables checkpointing for a step.
712
626
 
713
- Use `@resources` to specify the resource requirements
714
- independently of the specific compute layer (`@batch`, `@kubernetes`).
627
+ > Examples
715
628
 
716
- You can choose the compute layer on the command line by executing e.g.
717
- ```
718
- python myflow.py run --with batch
719
- ```
720
- or
629
+ - Saving Checkpoints
630
+
631
+ ```python
632
+ @checkpoint
633
+ @step
634
+ def train(self):
635
+ model = create_model(self.parameters, checkpoint_path = None)
636
+ for i in range(self.epochs):
637
+ # some training logic
638
+ loss = model.train(self.dataset)
639
+ if i % 10 == 0:
640
+ model.save(
641
+ current.checkpoint.directory,
642
+ )
643
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
644
+ # and returns a reference dictionary to the checkpoint saved in the datastore
645
+ self.latest_checkpoint = current.checkpoint.save(
646
+ name="epoch_checkpoint",
647
+ metadata={
648
+ "epoch": i,
649
+ "loss": loss,
650
+ }
651
+ )
721
652
  ```
722
- python myflow.py run --with kubernetes
653
+
654
+ - Using Loaded Checkpoints
655
+
656
+ ```python
657
+ @retry(times=3)
658
+ @checkpoint
659
+ @step
660
+ def train(self):
661
+ # Assume that the task has restarted and the previous attempt of the task
662
+ # saved a checkpoint
663
+ checkpoint_path = None
664
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
665
+ print("Loaded checkpoint from the previous attempt")
666
+ checkpoint_path = current.checkpoint.directory
667
+
668
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
669
+ for i in range(self.epochs):
670
+ ...
723
671
  ```
724
- which executes the flow on the desired system using the
725
- requirements specified in `@resources`.
726
672
 
727
673
 
728
674
  Parameters
729
675
  ----------
730
- cpu : int, default 1
731
- Number of CPUs required for this step.
732
- gpu : int, optional, default None
733
- Number of GPUs required for this step.
734
- disk : int, optional, default None
735
- Disk size (in MB) required for this step. Only applies on Kubernetes.
736
- memory : int, default 4096
737
- Memory size (in MB) required for this step.
738
- shared_memory : int, optional, default None
739
- The value for the size (in MiB) of the /dev/shm volume for this step.
740
- This parameter maps to the `--shm-size` option in Docker.
676
+ load_policy : str, default: "fresh"
677
+ The policy for loading the checkpoint. The following policies are supported:
678
+ - "eager": Loads the the latest available checkpoint within the namespace.
679
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
680
+ will be loaded at the start of the task.
681
+ - "none": Do not load any checkpoint
682
+ - "fresh": Loads the lastest checkpoint created within the running Task.
683
+ This mode helps loading checkpoints across various retry attempts of the same task.
684
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
685
+ created within the task will be loaded when the task is retries execution on failure.
686
+
687
+ temp_dir_root : str, default: None
688
+ The root directory under which `current.checkpoint.directory` will be created.
741
689
  """
742
690
  ...
743
691
 
744
692
  @typing.overload
745
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
693
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
694
+ ...
695
+
696
+ @typing.overload
697
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
698
+ ...
699
+
700
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
746
701
  """
747
- Specifies the Conda environment for the step.
702
+ Enables checkpointing for a step.
748
703
 
749
- Information in this decorator will augment any
750
- attributes set in the `@conda_base` flow-level decorator. Hence,
751
- you can use `@conda_base` to set packages required by all
752
- steps and use `@conda` to specify step-specific overrides.
704
+ > Examples
705
+
706
+ - Saving Checkpoints
707
+
708
+ ```python
709
+ @checkpoint
710
+ @step
711
+ def train(self):
712
+ model = create_model(self.parameters, checkpoint_path = None)
713
+ for i in range(self.epochs):
714
+ # some training logic
715
+ loss = model.train(self.dataset)
716
+ if i % 10 == 0:
717
+ model.save(
718
+ current.checkpoint.directory,
719
+ )
720
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
721
+ # and returns a reference dictionary to the checkpoint saved in the datastore
722
+ self.latest_checkpoint = current.checkpoint.save(
723
+ name="epoch_checkpoint",
724
+ metadata={
725
+ "epoch": i,
726
+ "loss": loss,
727
+ }
728
+ )
729
+ ```
730
+
731
+ - Using Loaded Checkpoints
732
+
733
+ ```python
734
+ @retry(times=3)
735
+ @checkpoint
736
+ @step
737
+ def train(self):
738
+ # Assume that the task has restarted and the previous attempt of the task
739
+ # saved a checkpoint
740
+ checkpoint_path = None
741
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
742
+ print("Loaded checkpoint from the previous attempt")
743
+ checkpoint_path = current.checkpoint.directory
744
+
745
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
746
+ for i in range(self.epochs):
747
+ ...
748
+ ```
753
749
 
754
750
 
755
751
  Parameters
756
752
  ----------
757
- packages : Dict[str, str], default {}
758
- Packages to use for this step. The key is the name of the package
759
- and the value is the version to use.
760
- libraries : Dict[str, str], default {}
761
- Supported for backward compatibility. When used with packages, packages will take precedence.
762
- python : str, optional, default None
763
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
764
- that the version used will correspond to the version of the Python interpreter used to start the run.
765
- disabled : bool, default False
766
- If set to True, disables @conda.
753
+ load_policy : str, default: "fresh"
754
+ The policy for loading the checkpoint. The following policies are supported:
755
+ - "eager": Loads the the latest available checkpoint within the namespace.
756
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
757
+ will be loaded at the start of the task.
758
+ - "none": Do not load any checkpoint
759
+ - "fresh": Loads the lastest checkpoint created within the running Task.
760
+ This mode helps loading checkpoints across various retry attempts of the same task.
761
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
762
+ created within the task will be loaded when the task is retries execution on failure.
763
+
764
+ temp_dir_root : str, default: None
765
+ The root directory under which `current.checkpoint.directory` will be created.
767
766
  """
768
767
  ...
769
768
 
770
769
  @typing.overload
771
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
770
+ def parallel(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
771
+ """
772
+ Decorator prototype for all step decorators. This function gets specialized
773
+ and imported for all decorators types by _import_plugin_decorators().
774
+ """
772
775
  ...
773
776
 
774
777
  @typing.overload
775
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
778
+ def parallel(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
776
779
  ...
777
780
 
778
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
781
+ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
779
782
  """
780
- Specifies the Conda environment for the step.
781
-
782
- Information in this decorator will augment any
783
- attributes set in the `@conda_base` flow-level decorator. Hence,
784
- you can use `@conda_base` to set packages required by all
785
- steps and use `@conda` to specify step-specific overrides.
786
-
787
-
788
- Parameters
789
- ----------
790
- packages : Dict[str, str], default {}
791
- Packages to use for this step. The key is the name of the package
792
- and the value is the version to use.
793
- libraries : Dict[str, str], default {}
794
- Supported for backward compatibility. When used with packages, packages will take precedence.
795
- python : str, optional, default None
796
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
797
- that the version used will correspond to the version of the Python interpreter used to start the run.
798
- disabled : bool, default False
799
- If set to True, disables @conda.
783
+ Decorator prototype for all step decorators. This function gets specialized
784
+ and imported for all decorators types by _import_plugin_decorators().
800
785
  """
801
786
  ...
802
787
 
803
788
  @typing.overload
804
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
789
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
805
790
  """
806
- Specifies the PyPI packages for the step.
807
-
808
- Information in this decorator will augment any
809
- attributes set in the `@pyi_base` flow-level decorator. Hence,
810
- you can use `@pypi_base` to set packages required by all
811
- steps and use `@pypi` to specify step-specific overrides.
791
+ Specifies environment variables to be set prior to the execution of a step.
812
792
 
813
793
 
814
794
  Parameters
815
795
  ----------
816
- packages : Dict[str, str], default: {}
817
- Packages to use for this step. The key is the name of the package
818
- and the value is the version to use.
819
- python : str, optional, default: None
820
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
821
- that the version used will correspond to the version of the Python interpreter used to start the run.
796
+ vars : Dict[str, str], default {}
797
+ Dictionary of environment variables to set.
822
798
  """
823
799
  ...
824
800
 
825
801
  @typing.overload
826
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
802
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
827
803
  ...
828
804
 
829
805
  @typing.overload
830
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
806
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
831
807
  ...
832
808
 
833
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
809
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
834
810
  """
835
- Specifies the PyPI packages for the step.
836
-
837
- Information in this decorator will augment any
838
- attributes set in the `@pyi_base` flow-level decorator. Hence,
839
- you can use `@pypi_base` to set packages required by all
840
- steps and use `@pypi` to specify step-specific overrides.
811
+ Specifies environment variables to be set prior to the execution of a step.
841
812
 
842
813
 
843
814
  Parameters
844
815
  ----------
845
- packages : Dict[str, str], default: {}
846
- Packages to use for this step. The key is the name of the package
847
- and the value is the version to use.
848
- python : str, optional, default: None
849
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
850
- that the version used will correspond to the version of the Python interpreter used to start the run.
816
+ vars : Dict[str, str], default {}
817
+ Dictionary of environment variables to set.
851
818
  """
852
819
  ...
853
820
 
854
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
821
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
855
822
  """
856
- Specifies that this step should execute on DGX cloud.
823
+ Specifies that this step should execute on Kubernetes.
857
824
 
858
825
 
859
826
  Parameters
860
827
  ----------
861
- gpu : int
862
- Number of GPUs to use.
863
- gpu_type : str
864
- Type of Nvidia GPU to use.
828
+ cpu : int, default 1
829
+ Number of CPUs required for this step. If `@resources` is
830
+ also present, the maximum value from all decorators is used.
831
+ memory : int, default 4096
832
+ Memory size (in MB) required for this step. If
833
+ `@resources` is also present, the maximum value from all decorators is
834
+ used.
835
+ disk : int, default 10240
836
+ Disk size (in MB) required for this step. If
837
+ `@resources` is also present, the maximum value from all decorators is
838
+ used.
839
+ image : str, optional, default None
840
+ Docker image to use when launching on Kubernetes. If not specified, and
841
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
842
+ not, a default Docker image mapping to the current version of Python is used.
843
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
844
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
845
+ image_pull_secrets: List[str], default []
846
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
847
+ Kubernetes image pull secrets to use when pulling container images
848
+ in Kubernetes.
849
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
850
+ Kubernetes service account to use when launching pod in Kubernetes.
851
+ secrets : List[str], optional, default None
852
+ Kubernetes secrets to use when launching pod in Kubernetes. These
853
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
854
+ in Metaflow configuration.
855
+ node_selector: Union[Dict[str,str], str], optional, default None
856
+ Kubernetes node selector(s) to apply to the pod running the task.
857
+ Can be passed in as a comma separated string of values e.g.
858
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
859
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
860
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
861
+ Kubernetes namespace to use when launching pod in Kubernetes.
862
+ gpu : int, optional, default None
863
+ Number of GPUs required for this step. A value of zero implies that
864
+ the scheduled node should not have GPUs.
865
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
866
+ The vendor of the GPUs to be used for this step.
867
+ tolerations : List[Dict[str,str]], default []
868
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
869
+ Kubernetes tolerations to use when launching pod in Kubernetes.
870
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
871
+ Kubernetes labels to use when launching pod in Kubernetes.
872
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
873
+ Kubernetes annotations to use when launching pod in Kubernetes.
874
+ use_tmpfs : bool, default False
875
+ This enables an explicit tmpfs mount for this step.
876
+ tmpfs_tempdir : bool, default True
877
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
878
+ tmpfs_size : int, optional, default: None
879
+ The value for the size (in MiB) of the tmpfs mount for this step.
880
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
881
+ memory allocated for this step.
882
+ tmpfs_path : str, optional, default /metaflow_temp
883
+ Path to tmpfs mount for this step.
884
+ persistent_volume_claims : Dict[str, str], optional, default None
885
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
886
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
887
+ shared_memory: int, optional
888
+ Shared memory size (in MiB) required for this step
889
+ port: int, optional
890
+ Port number to specify in the Kubernetes job object
891
+ compute_pool : str, optional, default None
892
+ Compute pool to be used for for this step.
893
+ If not specified, any accessible compute pool within the perimeter is used.
894
+ hostname_resolution_timeout: int, default 10 * 60
895
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
896
+ Only applicable when @parallel is used.
897
+ qos: str, default: Burstable
898
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
899
+
900
+ security_context: Dict[str, Any], optional, default None
901
+ Container security context. Applies to the task container. Allows the following keys:
902
+ - privileged: bool, optional, default None
903
+ - allow_privilege_escalation: bool, optional, default None
904
+ - run_as_user: int, optional, default None
905
+ - run_as_group: int, optional, default None
906
+ - run_as_non_root: bool, optional, default None
865
907
  """
866
908
  ...
867
909
 
868
910
  @typing.overload
869
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
911
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
870
912
  """
871
- A simple decorator that demonstrates using CardDecoratorInjector
872
- to inject a card and render simple markdown content.
913
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
914
+ It exists to make it easier for users to know that this decorator should only be used with
915
+ a Neo Cloud like CoreWeave.
873
916
  """
874
917
  ...
875
918
 
876
919
  @typing.overload
877
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
920
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
878
921
  ...
879
922
 
880
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
923
+ def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
881
924
  """
882
- A simple decorator that demonstrates using CardDecoratorInjector
883
- to inject a card and render simple markdown content.
925
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
926
+ It exists to make it easier for users to know that this decorator should only be used with
927
+ a Neo Cloud like CoreWeave.
884
928
  """
885
929
  ...
886
930
 
887
931
  @typing.overload
888
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
932
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
889
933
  """
890
- Decorator prototype for all step decorators. This function gets specialized
891
- and imported for all decorators types by _import_plugin_decorators().
934
+ Specifies the number of times the task corresponding
935
+ to a step needs to be retried.
936
+
937
+ This decorator is useful for handling transient errors, such as networking issues.
938
+ If your task contains operations that can't be retried safely, e.g. database updates,
939
+ it is advisable to annotate it with `@retry(times=0)`.
940
+
941
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
942
+ decorator will execute a no-op task after all retries have been exhausted,
943
+ ensuring that the flow execution can continue.
944
+
945
+
946
+ Parameters
947
+ ----------
948
+ times : int, default 3
949
+ Number of times to retry this task.
950
+ minutes_between_retries : int, default 2
951
+ Number of minutes between retries.
892
952
  """
893
953
  ...
894
954
 
895
955
  @typing.overload
896
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
956
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
897
957
  ...
898
958
 
899
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
900
- """
901
- Decorator prototype for all step decorators. This function gets specialized
902
- and imported for all decorators types by _import_plugin_decorators().
903
- """
959
+ @typing.overload
960
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
904
961
  ...
905
962
 
906
- @typing.overload
907
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
963
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
908
964
  """
909
965
  Specifies the number of times the task corresponding
910
966
  to a step needs to be retried.
@@ -920,494 +976,433 @@ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callabl
920
976
 
921
977
  Parameters
922
978
  ----------
923
- times : int, default 3
924
- Number of times to retry this task.
925
- minutes_between_retries : int, default 2
926
- Number of minutes between retries.
979
+ times : int, default 3
980
+ Number of times to retry this task.
981
+ minutes_between_retries : int, default 2
982
+ Number of minutes between retries.
983
+ """
984
+ ...
985
+
986
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
987
+ """
988
+ Specifies that this step should execute on DGX cloud.
989
+
990
+
991
+ Parameters
992
+ ----------
993
+ gpu : int
994
+ Number of GPUs to use.
995
+ gpu_type : str
996
+ Type of Nvidia GPU to use.
997
+ """
998
+ ...
999
+
1000
+ @typing.overload
1001
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1002
+ """
1003
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1004
+
1005
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1006
+
1007
+
1008
+ Parameters
1009
+ ----------
1010
+ type : str, default 'default'
1011
+ Card type.
1012
+ id : str, optional, default None
1013
+ If multiple cards are present, use this id to identify this card.
1014
+ options : Dict[str, Any], default {}
1015
+ Options passed to the card. The contents depend on the card type.
1016
+ timeout : int, default 45
1017
+ Interrupt reporting if it takes more than this many seconds.
1018
+ """
1019
+ ...
1020
+
1021
+ @typing.overload
1022
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1023
+ ...
1024
+
1025
+ @typing.overload
1026
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1027
+ ...
1028
+
1029
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1030
+ """
1031
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1032
+
1033
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1034
+
1035
+
1036
+ Parameters
1037
+ ----------
1038
+ type : str, default 'default'
1039
+ Card type.
1040
+ id : str, optional, default None
1041
+ If multiple cards are present, use this id to identify this card.
1042
+ options : Dict[str, Any], default {}
1043
+ Options passed to the card. The contents depend on the card type.
1044
+ timeout : int, default 45
1045
+ Interrupt reporting if it takes more than this many seconds.
927
1046
  """
928
1047
  ...
929
1048
 
930
1049
  @typing.overload
931
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1050
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1051
+ """
1052
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1053
+ It exists to make it easier for users to know that this decorator should only be used with
1054
+ a Neo Cloud like Nebius.
1055
+ """
932
1056
  ...
933
1057
 
934
1058
  @typing.overload
935
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1059
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
936
1060
  ...
937
1061
 
938
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1062
+ def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
939
1063
  """
940
- Specifies the number of times the task corresponding
941
- to a step needs to be retried.
942
-
943
- This decorator is useful for handling transient errors, such as networking issues.
944
- If your task contains operations that can't be retried safely, e.g. database updates,
945
- it is advisable to annotate it with `@retry(times=0)`.
946
-
947
- This can be used in conjunction with the `@catch` decorator. The `@catch`
948
- decorator will execute a no-op task after all retries have been exhausted,
949
- ensuring that the flow execution can continue.
950
-
951
-
952
- Parameters
953
- ----------
954
- times : int, default 3
955
- Number of times to retry this task.
956
- minutes_between_retries : int, default 2
957
- Number of minutes between retries.
1064
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1065
+ It exists to make it easier for users to know that this decorator should only be used with
1066
+ a Neo Cloud like Nebius.
958
1067
  """
959
1068
  ...
960
1069
 
961
1070
  @typing.overload
962
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1071
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
963
1072
  """
964
- Specifies environment variables to be set prior to the execution of a step.
1073
+ Specifies the resources needed when executing this step.
1074
+
1075
+ Use `@resources` to specify the resource requirements
1076
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1077
+
1078
+ You can choose the compute layer on the command line by executing e.g.
1079
+ ```
1080
+ python myflow.py run --with batch
1081
+ ```
1082
+ or
1083
+ ```
1084
+ python myflow.py run --with kubernetes
1085
+ ```
1086
+ which executes the flow on the desired system using the
1087
+ requirements specified in `@resources`.
965
1088
 
966
1089
 
967
1090
  Parameters
968
1091
  ----------
969
- vars : Dict[str, str], default {}
970
- Dictionary of environment variables to set.
1092
+ cpu : int, default 1
1093
+ Number of CPUs required for this step.
1094
+ gpu : int, optional, default None
1095
+ Number of GPUs required for this step.
1096
+ disk : int, optional, default None
1097
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1098
+ memory : int, default 4096
1099
+ Memory size (in MB) required for this step.
1100
+ shared_memory : int, optional, default None
1101
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1102
+ This parameter maps to the `--shm-size` option in Docker.
971
1103
  """
972
1104
  ...
973
1105
 
974
1106
  @typing.overload
975
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1107
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
976
1108
  ...
977
1109
 
978
1110
  @typing.overload
979
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1111
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
980
1112
  ...
981
1113
 
982
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
1114
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
983
1115
  """
984
- Specifies environment variables to be set prior to the execution of a step.
1116
+ Specifies the resources needed when executing this step.
1117
+
1118
+ Use `@resources` to specify the resource requirements
1119
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
1120
+
1121
+ You can choose the compute layer on the command line by executing e.g.
1122
+ ```
1123
+ python myflow.py run --with batch
1124
+ ```
1125
+ or
1126
+ ```
1127
+ python myflow.py run --with kubernetes
1128
+ ```
1129
+ which executes the flow on the desired system using the
1130
+ requirements specified in `@resources`.
985
1131
 
986
1132
 
987
1133
  Parameters
988
1134
  ----------
989
- vars : Dict[str, str], default {}
990
- Dictionary of environment variables to set.
1135
+ cpu : int, default 1
1136
+ Number of CPUs required for this step.
1137
+ gpu : int, optional, default None
1138
+ Number of GPUs required for this step.
1139
+ disk : int, optional, default None
1140
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
1141
+ memory : int, default 4096
1142
+ Memory size (in MB) required for this step.
1143
+ shared_memory : int, optional, default None
1144
+ The value for the size (in MiB) of the /dev/shm volume for this step.
1145
+ This parameter maps to the `--shm-size` option in Docker.
991
1146
  """
992
1147
  ...
993
1148
 
994
1149
  @typing.overload
995
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1150
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
996
1151
  """
997
- Enables checkpointing for a step.
998
-
999
- > Examples
1000
-
1001
- - Saving Checkpoints
1002
-
1003
- ```python
1004
- @checkpoint
1005
- @step
1006
- def train(self):
1007
- model = create_model(self.parameters, checkpoint_path = None)
1008
- for i in range(self.epochs):
1009
- # some training logic
1010
- loss = model.train(self.dataset)
1011
- if i % 10 == 0:
1012
- model.save(
1013
- current.checkpoint.directory,
1014
- )
1015
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1016
- # and returns a reference dictionary to the checkpoint saved in the datastore
1017
- self.latest_checkpoint = current.checkpoint.save(
1018
- name="epoch_checkpoint",
1019
- metadata={
1020
- "epoch": i,
1021
- "loss": loss,
1022
- }
1023
- )
1024
- ```
1152
+ Specifies a timeout for your step.
1025
1153
 
1026
- - Using Loaded Checkpoints
1154
+ This decorator is useful if this step may hang indefinitely.
1027
1155
 
1028
- ```python
1029
- @retry(times=3)
1030
- @checkpoint
1031
- @step
1032
- def train(self):
1033
- # Assume that the task has restarted and the previous attempt of the task
1034
- # saved a checkpoint
1035
- checkpoint_path = None
1036
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1037
- print("Loaded checkpoint from the previous attempt")
1038
- checkpoint_path = current.checkpoint.directory
1156
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1157
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1158
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1039
1159
 
1040
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1041
- for i in range(self.epochs):
1042
- ...
1043
- ```
1160
+ Note that all the values specified in parameters are added together so if you specify
1161
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1044
1162
 
1045
1163
 
1046
1164
  Parameters
1047
1165
  ----------
1048
- load_policy : str, default: "fresh"
1049
- The policy for loading the checkpoint. The following policies are supported:
1050
- - "eager": Loads the the latest available checkpoint within the namespace.
1051
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1052
- will be loaded at the start of the task.
1053
- - "none": Do not load any checkpoint
1054
- - "fresh": Loads the lastest checkpoint created within the running Task.
1055
- This mode helps loading checkpoints across various retry attempts of the same task.
1056
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1057
- created within the task will be loaded when the task is retries execution on failure.
1058
-
1059
- temp_dir_root : str, default: None
1060
- The root directory under which `current.checkpoint.directory` will be created.
1166
+ seconds : int, default 0
1167
+ Number of seconds to wait prior to timing out.
1168
+ minutes : int, default 0
1169
+ Number of minutes to wait prior to timing out.
1170
+ hours : int, default 0
1171
+ Number of hours to wait prior to timing out.
1061
1172
  """
1062
1173
  ...
1063
1174
 
1064
1175
  @typing.overload
1065
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1176
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1066
1177
  ...
1067
1178
 
1068
1179
  @typing.overload
1069
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1180
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1070
1181
  ...
1071
1182
 
1072
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
1183
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1073
1184
  """
1074
- Enables checkpointing for a step.
1075
-
1076
- > Examples
1077
-
1078
- - Saving Checkpoints
1079
-
1080
- ```python
1081
- @checkpoint
1082
- @step
1083
- def train(self):
1084
- model = create_model(self.parameters, checkpoint_path = None)
1085
- for i in range(self.epochs):
1086
- # some training logic
1087
- loss = model.train(self.dataset)
1088
- if i % 10 == 0:
1089
- model.save(
1090
- current.checkpoint.directory,
1091
- )
1092
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
1093
- # and returns a reference dictionary to the checkpoint saved in the datastore
1094
- self.latest_checkpoint = current.checkpoint.save(
1095
- name="epoch_checkpoint",
1096
- metadata={
1097
- "epoch": i,
1098
- "loss": loss,
1099
- }
1100
- )
1101
- ```
1185
+ Specifies a timeout for your step.
1102
1186
 
1103
- - Using Loaded Checkpoints
1187
+ This decorator is useful if this step may hang indefinitely.
1104
1188
 
1105
- ```python
1106
- @retry(times=3)
1107
- @checkpoint
1108
- @step
1109
- def train(self):
1110
- # Assume that the task has restarted and the previous attempt of the task
1111
- # saved a checkpoint
1112
- checkpoint_path = None
1113
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
1114
- print("Loaded checkpoint from the previous attempt")
1115
- checkpoint_path = current.checkpoint.directory
1189
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1190
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
1191
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
1116
1192
 
1117
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
1118
- for i in range(self.epochs):
1119
- ...
1120
- ```
1193
+ Note that all the values specified in parameters are added together so if you specify
1194
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1121
1195
 
1122
1196
 
1123
1197
  Parameters
1124
1198
  ----------
1125
- load_policy : str, default: "fresh"
1126
- The policy for loading the checkpoint. The following policies are supported:
1127
- - "eager": Loads the the latest available checkpoint within the namespace.
1128
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
1129
- will be loaded at the start of the task.
1130
- - "none": Do not load any checkpoint
1131
- - "fresh": Loads the lastest checkpoint created within the running Task.
1132
- This mode helps loading checkpoints across various retry attempts of the same task.
1133
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
1134
- created within the task will be loaded when the task is retries execution on failure.
1135
-
1136
- temp_dir_root : str, default: None
1137
- The root directory under which `current.checkpoint.directory` will be created.
1199
+ seconds : int, default 0
1200
+ Number of seconds to wait prior to timing out.
1201
+ minutes : int, default 0
1202
+ Number of minutes to wait prior to timing out.
1203
+ hours : int, default 0
1204
+ Number of hours to wait prior to timing out.
1205
+ """
1206
+ ...
1207
+
1208
+ @typing.overload
1209
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1210
+ """
1211
+ Decorator prototype for all step decorators. This function gets specialized
1212
+ and imported for all decorators types by _import_plugin_decorators().
1138
1213
  """
1139
1214
  ...
1140
1215
 
1141
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1216
+ @typing.overload
1217
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1218
+ ...
1219
+
1220
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1142
1221
  """
1143
- Specifies that this step should execute on DGX cloud.
1144
-
1145
-
1146
- Parameters
1147
- ----------
1148
- gpu : int
1149
- Number of GPUs to use.
1150
- gpu_type : str
1151
- Type of Nvidia GPU to use.
1152
- queue_timeout : int
1153
- Time to keep the job in NVCF's queue.
1222
+ Decorator prototype for all step decorators. This function gets specialized
1223
+ and imported for all decorators types by _import_plugin_decorators().
1154
1224
  """
1155
1225
  ...
1156
1226
 
1157
- def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.Union[typing.List[str], typing.List[typing.Tuple[typing.Dict, str]], typing.List[typing.Tuple[str, str]], typing.List[typing.Dict], None]) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1227
+ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1158
1228
  """
1159
- Decorator that helps cache, version and store models/datasets from huggingface hub.
1160
-
1161
- > Examples
1162
-
1163
- **Usage: creating references of models from huggingface that may be loaded in downstream steps**
1164
- ```python
1165
- @huggingface_hub
1166
- @step
1167
- def pull_model_from_huggingface(self):
1168
- # `current.huggingface_hub.snapshot_download` downloads the model from the Hugging Face Hub
1169
- # and saves it in the backend storage based on the model's `repo_id`. If there exists a model
1170
- # with the same `repo_id` in the backend storage, it will not download the model again. The return
1171
- # value of the function is a reference to the model in the backend storage.
1172
- # This reference can be used to load the model in the subsequent steps via `@model(load=["llama_model"])`
1173
-
1174
- self.model_id = "mistralai/Mistral-7B-Instruct-v0.1"
1175
- self.llama_model = current.huggingface_hub.snapshot_download(
1176
- repo_id=self.model_id,
1177
- allow_patterns=["*.safetensors", "*.json", "tokenizer.*"],
1178
- )
1179
- self.next(self.train)
1180
- ```
1181
-
1182
- **Usage: loading models directly from huggingface hub or from cache (from metaflow's datastore)**
1183
- ```python
1184
- @huggingface_hub(load=["mistralai/Mistral-7B-Instruct-v0.1"])
1185
- @step
1186
- def pull_model_from_huggingface(self):
1187
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1188
- ```
1189
-
1190
- ```python
1191
- @huggingface_hub(load=[("mistralai/Mistral-7B-Instruct-v0.1", "/my-directory"), ("myorg/mistral-lora, "/my-lora-directory")])
1192
- @step
1193
- def finetune_model(self):
1194
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1195
- # path_to_model will be /my-directory
1196
- ```
1197
-
1198
- ```python
1199
- # Takes all the arguments passed to `snapshot_download`
1200
- # except for `local_dir`
1201
- @huggingface_hub(load=[
1202
- {
1203
- "repo_id": "mistralai/Mistral-7B-Instruct-v0.1",
1204
- },
1205
- {
1206
- "repo_id": "myorg/mistral-lora",
1207
- "repo_type": "model",
1208
- },
1209
- ])
1210
- @step
1211
- def finetune_model(self):
1212
- path_to_model = current.huggingface_hub.loaded["mistralai/Mistral-7B-Instruct-v0.1"]
1213
- # path_to_model will be /my-directory
1214
- ```
1229
+ S3 Proxy decorator for routing S3 requests through a local proxy service.
1215
1230
 
1216
1231
 
1217
1232
  Parameters
1218
1233
  ----------
1219
- temp_dir_root : str, optional
1220
- The root directory that will hold the temporary directory where objects will be downloaded.
1221
-
1222
- load: Union[List[str], List[Tuple[Dict, str]], List[Tuple[str, str]], List[Dict], None]
1223
- The list of repos (models/datasets) to load.
1224
-
1225
- Loaded repos can be accessed via `current.huggingface_hub.loaded`. If load is set, then the following happens:
1226
-
1227
- - If repo (model/dataset) is not found in the datastore:
1228
- - Downloads the repo from Hugging Face Hub to a temporary directory (or uses specified path) for local access
1229
- - Stores it in Metaflow's datastore (s3/gcs/azure etc.) with a unique name based on repo_type/repo_id
1230
- - All HF models loaded for a `@step` will be cached separately under flow/step/namespace.
1231
-
1232
- - If repo is found in the datastore:
1233
- - Loads it directly from datastore to local path (can be temporary directory or specified path)
1234
+ integration_name : str, optional
1235
+ Name of the S3 proxy integration. If not specified, will use the only
1236
+ available S3 proxy integration in the namespace (fails if multiple exist).
1237
+ write_mode : str, optional
1238
+ The desired behavior during write operations to target (origin) S3 bucket.
1239
+ allowed options are:
1240
+ "origin-and-cache" -> write to both the target S3 bucket and local object
1241
+ storage
1242
+ "origin" -> only write to the target S3 bucket
1243
+ "cache" -> only write to the object storage service used for caching
1244
+ debug : bool, optional
1245
+ Enable debug logging for proxy operations.
1234
1246
  """
1235
1247
  ...
1236
1248
 
1237
1249
  @typing.overload
1238
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1250
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1239
1251
  """
1240
- Specifies secrets to be retrieved and injected as environment variables prior to
1241
- the execution of a step.
1252
+ Specifies the Conda environment for the step.
1253
+
1254
+ Information in this decorator will augment any
1255
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1256
+ you can use `@conda_base` to set packages required by all
1257
+ steps and use `@conda` to specify step-specific overrides.
1242
1258
 
1243
1259
 
1244
1260
  Parameters
1245
1261
  ----------
1246
- sources : List[Union[str, Dict[str, Any]]], default: []
1247
- List of secret specs, defining how the secrets are to be retrieved
1248
- role : str, optional, default: None
1249
- Role to use for fetching secrets
1262
+ packages : Dict[str, str], default {}
1263
+ Packages to use for this step. The key is the name of the package
1264
+ and the value is the version to use.
1265
+ libraries : Dict[str, str], default {}
1266
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1267
+ python : str, optional, default None
1268
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1269
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1270
+ disabled : bool, default False
1271
+ If set to True, disables @conda.
1250
1272
  """
1251
1273
  ...
1252
1274
 
1253
1275
  @typing.overload
1254
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1276
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1255
1277
  ...
1256
1278
 
1257
1279
  @typing.overload
1258
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1280
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1259
1281
  ...
1260
1282
 
1261
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1283
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1262
1284
  """
1263
- Specifies secrets to be retrieved and injected as environment variables prior to
1264
- the execution of a step.
1285
+ Specifies the Conda environment for the step.
1286
+
1287
+ Information in this decorator will augment any
1288
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1289
+ you can use `@conda_base` to set packages required by all
1290
+ steps and use `@conda` to specify step-specific overrides.
1265
1291
 
1266
1292
 
1267
1293
  Parameters
1268
1294
  ----------
1269
- sources : List[Union[str, Dict[str, Any]]], default: []
1270
- List of secret specs, defining how the secrets are to be retrieved
1271
- role : str, optional, default: None
1272
- Role to use for fetching secrets
1295
+ packages : Dict[str, str], default {}
1296
+ Packages to use for this step. The key is the name of the package
1297
+ and the value is the version to use.
1298
+ libraries : Dict[str, str], default {}
1299
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1300
+ python : str, optional, default None
1301
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1302
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1303
+ disabled : bool, default False
1304
+ If set to True, disables @conda.
1273
1305
  """
1274
1306
  ...
1275
1307
 
1276
- @typing.overload
1277
- def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1308
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1278
1309
  """
1279
- Specifies the event(s) that this flow depends on.
1310
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
1280
1311
 
1281
- ```
1282
- @trigger(event='foo')
1283
- ```
1284
- or
1285
- ```
1286
- @trigger(events=['foo', 'bar'])
1287
- ```
1312
+ User code call
1313
+ --------------
1314
+ @ollama(
1315
+ models=[...],
1316
+ ...
1317
+ )
1288
1318
 
1289
- Additionally, you can specify the parameter mappings
1290
- to map event payload to Metaflow parameters for the flow.
1291
- ```
1292
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1293
- ```
1294
- or
1295
- ```
1296
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1297
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1298
- ```
1319
+ Valid backend options
1320
+ ---------------------
1321
+ - 'local': Run as a separate process on the local task machine.
1322
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1323
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1299
1324
 
1300
- 'parameters' can also be a list of strings and tuples like so:
1301
- ```
1302
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1303
- ```
1304
- This is equivalent to:
1305
- ```
1306
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1307
- ```
1325
+ Valid model options
1326
+ -------------------
1327
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
1308
1328
 
1309
1329
 
1310
1330
  Parameters
1311
1331
  ----------
1312
- event : Union[str, Dict[str, Any]], optional, default None
1313
- Event dependency for this flow.
1314
- events : List[Union[str, Dict[str, Any]]], default []
1315
- Events dependency for this flow.
1316
- options : Dict[str, Any], default {}
1317
- Backend-specific configuration for tuning eventing behavior.
1332
+ models: list[str]
1333
+ List of Ollama containers running models in sidecars.
1334
+ backend: str
1335
+ Determines where and how to run the Ollama process.
1336
+ force_pull: bool
1337
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1338
+ cache_update_policy: str
1339
+ Cache update policy: "auto", "force", or "never".
1340
+ force_cache_update: bool
1341
+ Simple override for "force" cache update policy.
1342
+ debug: bool
1343
+ Whether to turn on verbose debugging logs.
1344
+ circuit_breaker_config: dict
1345
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1346
+ timeout_config: dict
1347
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
1318
1348
  """
1319
1349
  ...
1320
1350
 
1321
1351
  @typing.overload
1322
- def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1323
- ...
1324
-
1325
- def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1352
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1326
1353
  """
1327
- Specifies the event(s) that this flow depends on.
1328
-
1329
- ```
1330
- @trigger(event='foo')
1331
- ```
1332
- or
1333
- ```
1334
- @trigger(events=['foo', 'bar'])
1335
- ```
1336
-
1337
- Additionally, you can specify the parameter mappings
1338
- to map event payload to Metaflow parameters for the flow.
1339
- ```
1340
- @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1341
- ```
1342
- or
1343
- ```
1344
- @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1345
- {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1346
- ```
1354
+ Specifies the Conda environment for all steps of the flow.
1347
1355
 
1348
- 'parameters' can also be a list of strings and tuples like so:
1349
- ```
1350
- @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1351
- ```
1352
- This is equivalent to:
1353
- ```
1354
- @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1355
- ```
1356
+ Use `@conda_base` to set common libraries required by all
1357
+ steps and use `@conda` to specify step-specific additions.
1356
1358
 
1357
1359
 
1358
1360
  Parameters
1359
1361
  ----------
1360
- event : Union[str, Dict[str, Any]], optional, default None
1361
- Event dependency for this flow.
1362
- events : List[Union[str, Dict[str, Any]]], default []
1363
- Events dependency for this flow.
1364
- options : Dict[str, Any], default {}
1365
- Backend-specific configuration for tuning eventing behavior.
1362
+ packages : Dict[str, str], default {}
1363
+ Packages to use for this flow. The key is the name of the package
1364
+ and the value is the version to use.
1365
+ libraries : Dict[str, str], default {}
1366
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1367
+ python : str, optional, default None
1368
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1369
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1370
+ disabled : bool, default False
1371
+ If set to True, disables Conda.
1366
1372
  """
1367
1373
  ...
1368
1374
 
1369
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1375
+ @typing.overload
1376
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1377
+ ...
1378
+
1379
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1370
1380
  """
1371
- Specifies what flows belong to the same project.
1381
+ Specifies the Conda environment for all steps of the flow.
1372
1382
 
1373
- A project-specific namespace is created for all flows that
1374
- use the same `@project(name)`.
1383
+ Use `@conda_base` to set common libraries required by all
1384
+ steps and use `@conda` to specify step-specific additions.
1375
1385
 
1376
1386
 
1377
1387
  Parameters
1378
1388
  ----------
1379
- name : str
1380
- Project name. Make sure that the name is unique amongst all
1381
- projects that use the same production scheduler. The name may
1382
- contain only lowercase alphanumeric characters and underscores.
1383
-
1384
- branch : Optional[str], default None
1385
- The branch to use. If not specified, the branch is set to
1386
- `user.<username>` unless `production` is set to `True`. This can
1387
- also be set on the command line using `--branch` as a top-level option.
1388
- It is an error to specify `branch` in the decorator and on the command line.
1389
-
1390
- production : bool, default False
1391
- Whether or not the branch is the production branch. This can also be set on the
1392
- command line using `--production` as a top-level option. It is an error to specify
1393
- `production` in the decorator and on the command line.
1394
- The project branch name will be:
1395
- - if `branch` is specified:
1396
- - if `production` is True: `prod.<branch>`
1397
- - if `production` is False: `test.<branch>`
1398
- - if `branch` is not specified:
1399
- - if `production` is True: `prod`
1400
- - if `production` is False: `user.<username>`
1389
+ packages : Dict[str, str], default {}
1390
+ Packages to use for this flow. The key is the name of the package
1391
+ and the value is the version to use.
1392
+ libraries : Dict[str, str], default {}
1393
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1394
+ python : str, optional, default None
1395
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1396
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1397
+ disabled : bool, default False
1398
+ If set to True, disables Conda.
1401
1399
  """
1402
1400
  ...
1403
1401
 
1404
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1402
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1405
1403
  """
1406
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1407
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1408
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1409
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1410
- starts only after all sensors finish.
1404
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1405
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1411
1406
 
1412
1407
 
1413
1408
  Parameters
@@ -1429,18 +1424,21 @@ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, expone
1429
1424
  Name of the sensor on Airflow
1430
1425
  description : str
1431
1426
  Description of sensor in the Airflow UI
1432
- bucket_key : Union[str, List[str]]
1433
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1434
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1435
- bucket_name : str
1436
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1437
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1438
- wildcard_match : bool
1439
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1440
- aws_conn_id : str
1441
- a reference to the s3 connection on Airflow. (Default: None)
1442
- verify : bool
1443
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1427
+ external_dag_id : str
1428
+ The dag_id that contains the task you want to wait for.
1429
+ external_task_ids : List[str]
1430
+ The list of task_ids that you want to wait for.
1431
+ If None (default value) the sensor waits for the DAG. (Default: None)
1432
+ allowed_states : List[str]
1433
+ Iterable of allowed states, (Default: ['success'])
1434
+ failed_states : List[str]
1435
+ Iterable of failed or dis-allowed states. (Default: None)
1436
+ execution_delta : datetime.timedelta
1437
+ time difference with the previous execution to look at,
1438
+ the default is the same logical date as the current task or DAG. (Default: None)
1439
+ check_existence: bool
1440
+ Set to True to check if the external task exists or check if
1441
+ the DAG to wait for exists. (Default: True)
1444
1442
  """
1445
1443
  ...
1446
1444
 
@@ -1495,97 +1493,160 @@ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly:
1495
1493
  """
1496
1494
  ...
1497
1495
 
1498
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1496
+ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1499
1497
  """
1500
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1501
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1498
+ Allows setting external datastores to save data for the
1499
+ `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1502
1500
 
1501
+ This decorator is useful when users wish to save data to a different datastore
1502
+ than what is configured in Metaflow. This can be for variety of reasons:
1503
1503
 
1504
- Parameters
1504
+ 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1505
+ 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1506
+ - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1507
+ 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1508
+ - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1509
+
1510
+ Usage:
1505
1511
  ----------
1506
- timeout : int
1507
- Time, in seconds before the task times out and fails. (Default: 3600)
1508
- poke_interval : int
1509
- Time in seconds that the job should wait in between each try. (Default: 60)
1510
- mode : str
1511
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1512
- exponential_backoff : bool
1513
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1514
- pool : str
1515
- the slot pool this task should run in,
1516
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1517
- soft_fail : bool
1518
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1519
- name : str
1520
- Name of the sensor on Airflow
1521
- description : str
1522
- Description of sensor in the Airflow UI
1523
- external_dag_id : str
1524
- The dag_id that contains the task you want to wait for.
1525
- external_task_ids : List[str]
1526
- The list of task_ids that you want to wait for.
1527
- If None (default value) the sensor waits for the DAG. (Default: None)
1528
- allowed_states : List[str]
1529
- Iterable of allowed states, (Default: ['success'])
1530
- failed_states : List[str]
1531
- Iterable of failed or dis-allowed states. (Default: None)
1532
- execution_delta : datetime.timedelta
1533
- time difference with the previous execution to look at,
1534
- the default is the same logical date as the current task or DAG. (Default: None)
1535
- check_existence: bool
1536
- Set to True to check if the external task exists or check if
1537
- the DAG to wait for exists. (Default: True)
1538
- """
1539
- ...
1540
-
1541
- @typing.overload
1542
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1543
- """
1544
- Specifies the Conda environment for all steps of the flow.
1545
1512
 
1546
- Use `@conda_base` to set common libraries required by all
1547
- steps and use `@conda` to specify step-specific additions.
1513
+ - Using a custom IAM role to access the datastore.
1548
1514
 
1515
+ ```python
1516
+ @with_artifact_store(
1517
+ type="s3",
1518
+ config=lambda: {
1519
+ "root": "s3://my-bucket-foo/path/to/root",
1520
+ "role_arn": ROLE,
1521
+ },
1522
+ )
1523
+ class MyFlow(FlowSpec):
1549
1524
 
1550
- Parameters
1525
+ @checkpoint
1526
+ @step
1527
+ def start(self):
1528
+ with open("my_file.txt", "w") as f:
1529
+ f.write("Hello, World!")
1530
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1531
+ self.next(self.end)
1532
+
1533
+ ```
1534
+
1535
+ - Using credentials to access the s3-compatible datastore.
1536
+
1537
+ ```python
1538
+ @with_artifact_store(
1539
+ type="s3",
1540
+ config=lambda: {
1541
+ "root": "s3://my-bucket-foo/path/to/root",
1542
+ "client_params": {
1543
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1544
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1545
+ },
1546
+ },
1547
+ )
1548
+ class MyFlow(FlowSpec):
1549
+
1550
+ @checkpoint
1551
+ @step
1552
+ def start(self):
1553
+ with open("my_file.txt", "w") as f:
1554
+ f.write("Hello, World!")
1555
+ self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1556
+ self.next(self.end)
1557
+
1558
+ ```
1559
+
1560
+ - Accessing objects stored in external datastores after task execution.
1561
+
1562
+ ```python
1563
+ run = Run("CheckpointsTestsFlow/8992")
1564
+ with artifact_store_from(run=run, config={
1565
+ "client_params": {
1566
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1567
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1568
+ },
1569
+ }):
1570
+ with Checkpoint() as cp:
1571
+ latest = cp.list(
1572
+ task=run["start"].task
1573
+ )[0]
1574
+ print(latest)
1575
+ cp.load(
1576
+ latest,
1577
+ "test-checkpoints"
1578
+ )
1579
+
1580
+ task = Task("TorchTuneFlow/8484/train/53673")
1581
+ with artifact_store_from(run=run, config={
1582
+ "client_params": {
1583
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1584
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1585
+ },
1586
+ }):
1587
+ load_model(
1588
+ task.data.model_ref,
1589
+ "test-models"
1590
+ )
1591
+ ```
1592
+ Parameters:
1551
1593
  ----------
1552
- packages : Dict[str, str], default {}
1553
- Packages to use for this flow. The key is the name of the package
1554
- and the value is the version to use.
1555
- libraries : Dict[str, str], default {}
1556
- Supported for backward compatibility. When used with packages, packages will take precedence.
1557
- python : str, optional, default None
1558
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1559
- that the version used will correspond to the version of the Python interpreter used to start the run.
1560
- disabled : bool, default False
1561
- If set to True, disables Conda.
1594
+
1595
+ type: str
1596
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1597
+
1598
+ config: dict or Callable
1599
+ Dictionary of configuration options for the datastore. The following keys are required:
1600
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1601
+ - example: 's3://bucket-name/path/to/root'
1602
+ - example: 'gs://bucket-name/path/to/root'
1603
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1604
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1605
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1606
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1562
1607
  """
1563
1608
  ...
1564
1609
 
1565
- @typing.overload
1566
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1567
- ...
1568
-
1569
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1610
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1570
1611
  """
1571
- Specifies the Conda environment for all steps of the flow.
1572
-
1573
- Use `@conda_base` to set common libraries required by all
1574
- steps and use `@conda` to specify step-specific additions.
1612
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1613
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1614
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1615
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1616
+ starts only after all sensors finish.
1575
1617
 
1576
1618
 
1577
1619
  Parameters
1578
1620
  ----------
1579
- packages : Dict[str, str], default {}
1580
- Packages to use for this flow. The key is the name of the package
1581
- and the value is the version to use.
1582
- libraries : Dict[str, str], default {}
1583
- Supported for backward compatibility. When used with packages, packages will take precedence.
1584
- python : str, optional, default None
1585
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1586
- that the version used will correspond to the version of the Python interpreter used to start the run.
1587
- disabled : bool, default False
1588
- If set to True, disables Conda.
1621
+ timeout : int
1622
+ Time, in seconds before the task times out and fails. (Default: 3600)
1623
+ poke_interval : int
1624
+ Time in seconds that the job should wait in between each try. (Default: 60)
1625
+ mode : str
1626
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1627
+ exponential_backoff : bool
1628
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1629
+ pool : str
1630
+ the slot pool this task should run in,
1631
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1632
+ soft_fail : bool
1633
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1634
+ name : str
1635
+ Name of the sensor on Airflow
1636
+ description : str
1637
+ Description of sensor in the Airflow UI
1638
+ bucket_key : Union[str, List[str]]
1639
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1640
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1641
+ bucket_name : str
1642
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1643
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1644
+ wildcard_match : bool
1645
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1646
+ aws_conn_id : str
1647
+ a reference to the s3 connection on Airflow. (Default: None)
1648
+ verify : bool
1649
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1589
1650
  """
1590
1651
  ...
1591
1652
 
@@ -1690,117 +1751,96 @@ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *
1690
1751
  """
1691
1752
  ...
1692
1753
 
1693
- def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None):
1754
+ @typing.overload
1755
+ def trigger(*, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1694
1756
  """
1695
- Allows setting external datastores to save data for the
1696
- `@checkpoint`/`@model`/`@huggingface_hub` decorators.
1697
-
1698
- This decorator is useful when users wish to save data to a different datastore
1699
- than what is configured in Metaflow. This can be for variety of reasons:
1700
-
1701
- 1. Data security: The objects needs to be stored in a bucket (object storage) that is not accessible by other flows.
1702
- 2. Data Locality: The location where the task is executing is not located in the same region as the datastore.
1703
- - Example: Metaflow datastore lives in US East, but the task is executing in Finland datacenters.
1704
- 3. Data Lifecycle Policies: The objects need to be archived / managed separately from the Metaflow managed objects.
1705
- - Example: Flow is training very large models that need to be stored separately and will be deleted more aggressively than the Metaflow managed objects.
1706
-
1707
- Usage:
1708
- ----------
1709
-
1710
- - Using a custom IAM role to access the datastore.
1757
+ Specifies the event(s) that this flow depends on.
1711
1758
 
1712
- ```python
1713
- @with_artifact_store(
1714
- type="s3",
1715
- config=lambda: {
1716
- "root": "s3://my-bucket-foo/path/to/root",
1717
- "role_arn": ROLE,
1718
- },
1719
- )
1720
- class MyFlow(FlowSpec):
1759
+ ```
1760
+ @trigger(event='foo')
1761
+ ```
1762
+ or
1763
+ ```
1764
+ @trigger(events=['foo', 'bar'])
1765
+ ```
1721
1766
 
1722
- @checkpoint
1723
- @step
1724
- def start(self):
1725
- with open("my_file.txt", "w") as f:
1726
- f.write("Hello, World!")
1727
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1728
- self.next(self.end)
1767
+ Additionally, you can specify the parameter mappings
1768
+ to map event payload to Metaflow parameters for the flow.
1769
+ ```
1770
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1771
+ ```
1772
+ or
1773
+ ```
1774
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1775
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1776
+ ```
1729
1777
 
1730
- ```
1778
+ 'parameters' can also be a list of strings and tuples like so:
1779
+ ```
1780
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1781
+ ```
1782
+ This is equivalent to:
1783
+ ```
1784
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1785
+ ```
1731
1786
 
1732
- - Using credentials to access the s3-compatible datastore.
1733
1787
 
1734
- ```python
1735
- @with_artifact_store(
1736
- type="s3",
1737
- config=lambda: {
1738
- "root": "s3://my-bucket-foo/path/to/root",
1739
- "client_params": {
1740
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1741
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1742
- },
1743
- },
1744
- )
1745
- class MyFlow(FlowSpec):
1788
+ Parameters
1789
+ ----------
1790
+ event : Union[str, Dict[str, Any]], optional, default None
1791
+ Event dependency for this flow.
1792
+ events : List[Union[str, Dict[str, Any]]], default []
1793
+ Events dependency for this flow.
1794
+ options : Dict[str, Any], default {}
1795
+ Backend-specific configuration for tuning eventing behavior.
1796
+ """
1797
+ ...
1798
+
1799
+ @typing.overload
1800
+ def trigger(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1801
+ ...
1802
+
1803
+ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: typing.Union[str, typing.Dict[str, typing.Any], None] = None, events: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], options: typing.Dict[str, typing.Any] = {}):
1804
+ """
1805
+ Specifies the event(s) that this flow depends on.
1746
1806
 
1747
- @checkpoint
1748
- @step
1749
- def start(self):
1750
- with open("my_file.txt", "w") as f:
1751
- f.write("Hello, World!")
1752
- self.external_bucket_checkpoint = current.checkpoint.save("my_file.txt")
1753
- self.next(self.end)
1807
+ ```
1808
+ @trigger(event='foo')
1809
+ ```
1810
+ or
1811
+ ```
1812
+ @trigger(events=['foo', 'bar'])
1813
+ ```
1754
1814
 
1755
- ```
1815
+ Additionally, you can specify the parameter mappings
1816
+ to map event payload to Metaflow parameters for the flow.
1817
+ ```
1818
+ @trigger(event={'name':'foo', 'parameters':{'flow_param': 'event_field'}})
1819
+ ```
1820
+ or
1821
+ ```
1822
+ @trigger(events=[{'name':'foo', 'parameters':{'flow_param_1': 'event_field_1'},
1823
+ {'name':'bar', 'parameters':{'flow_param_2': 'event_field_2'}])
1824
+ ```
1756
1825
 
1757
- - Accessing objects stored in external datastores after task execution.
1826
+ 'parameters' can also be a list of strings and tuples like so:
1827
+ ```
1828
+ @trigger(event={'name':'foo', 'parameters':['common_name', ('flow_param', 'event_field')]})
1829
+ ```
1830
+ This is equivalent to:
1831
+ ```
1832
+ @trigger(event={'name':'foo', 'parameters':{'common_name': 'common_name', 'flow_param': 'event_field'}})
1833
+ ```
1758
1834
 
1759
- ```python
1760
- run = Run("CheckpointsTestsFlow/8992")
1761
- with artifact_store_from(run=run, config={
1762
- "client_params": {
1763
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1764
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1765
- },
1766
- }):
1767
- with Checkpoint() as cp:
1768
- latest = cp.list(
1769
- task=run["start"].task
1770
- )[0]
1771
- print(latest)
1772
- cp.load(
1773
- latest,
1774
- "test-checkpoints"
1775
- )
1776
1835
 
1777
- task = Task("TorchTuneFlow/8484/train/53673")
1778
- with artifact_store_from(run=run, config={
1779
- "client_params": {
1780
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1781
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1782
- },
1783
- }):
1784
- load_model(
1785
- task.data.model_ref,
1786
- "test-models"
1787
- )
1788
- ```
1789
- Parameters:
1836
+ Parameters
1790
1837
  ----------
1791
-
1792
- type: str
1793
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1794
-
1795
- config: dict or Callable
1796
- Dictionary of configuration options for the datastore. The following keys are required:
1797
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1798
- - example: 's3://bucket-name/path/to/root'
1799
- - example: 'gs://bucket-name/path/to/root'
1800
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1801
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1802
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1803
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1838
+ event : Union[str, Dict[str, Any]], optional, default None
1839
+ Event dependency for this flow.
1840
+ events : List[Union[str, Dict[str, Any]]], default []
1841
+ Events dependency for this flow.
1842
+ options : Dict[str, Any], default {}
1843
+ Backend-specific configuration for tuning eventing behavior.
1804
1844
  """
1805
1845
  ...
1806
1846
 
@@ -1845,5 +1885,40 @@ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packag
1845
1885
  """
1846
1886
  ...
1847
1887
 
1888
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1889
+ """
1890
+ Specifies what flows belong to the same project.
1891
+
1892
+ A project-specific namespace is created for all flows that
1893
+ use the same `@project(name)`.
1894
+
1895
+
1896
+ Parameters
1897
+ ----------
1898
+ name : str
1899
+ Project name. Make sure that the name is unique amongst all
1900
+ projects that use the same production scheduler. The name may
1901
+ contain only lowercase alphanumeric characters and underscores.
1902
+
1903
+ branch : Optional[str], default None
1904
+ The branch to use. If not specified, the branch is set to
1905
+ `user.<username>` unless `production` is set to `True`. This can
1906
+ also be set on the command line using `--branch` as a top-level option.
1907
+ It is an error to specify `branch` in the decorator and on the command line.
1908
+
1909
+ production : bool, default False
1910
+ Whether or not the branch is the production branch. This can also be set on the
1911
+ command line using `--production` as a top-level option. It is an error to specify
1912
+ `production` in the decorator and on the command line.
1913
+ The project branch name will be:
1914
+ - if `branch` is specified:
1915
+ - if `production` is True: `prod.<branch>`
1916
+ - if `production` is False: `test.<branch>`
1917
+ - if `branch` is not specified:
1918
+ - if `production` is True: `prod`
1919
+ - if `production` is False: `user.<username>`
1920
+ """
1921
+ ...
1922
+
1848
1923
  pkg_name: str
1849
1924