ob-metaflow-stubs 6.0.10.0__py2.py3-none-any.whl → 6.0.10.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. metaflow-stubs/__init__.pyi +1109 -1109
  2. metaflow-stubs/cards.pyi +2 -2
  3. metaflow-stubs/cli.pyi +2 -2
  4. metaflow-stubs/cli_components/__init__.pyi +2 -2
  5. metaflow-stubs/cli_components/utils.pyi +2 -2
  6. metaflow-stubs/client/__init__.pyi +2 -2
  7. metaflow-stubs/client/core.pyi +6 -6
  8. metaflow-stubs/client/filecache.pyi +3 -3
  9. metaflow-stubs/events.pyi +3 -3
  10. metaflow-stubs/exception.pyi +2 -2
  11. metaflow-stubs/flowspec.pyi +5 -5
  12. metaflow-stubs/generated_for.txt +1 -1
  13. metaflow-stubs/includefile.pyi +3 -3
  14. metaflow-stubs/meta_files.pyi +2 -2
  15. metaflow-stubs/metadata_provider/__init__.pyi +2 -2
  16. metaflow-stubs/metadata_provider/heartbeat.pyi +2 -2
  17. metaflow-stubs/metadata_provider/metadata.pyi +2 -2
  18. metaflow-stubs/metadata_provider/util.pyi +2 -2
  19. metaflow-stubs/metaflow_config.pyi +6 -2
  20. metaflow-stubs/metaflow_current.pyi +59 -59
  21. metaflow-stubs/metaflow_git.pyi +2 -2
  22. metaflow-stubs/mf_extensions/__init__.pyi +2 -2
  23. metaflow-stubs/mf_extensions/obcheckpoint/__init__.pyi +2 -2
  24. metaflow-stubs/mf_extensions/obcheckpoint/plugins/__init__.pyi +2 -2
  25. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/__init__.pyi +2 -2
  26. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/__init__.pyi +2 -2
  27. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/async_cards.pyi +2 -2
  28. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/deco_injection_mixin.pyi +2 -2
  29. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/card_utils/extra_components.pyi +2 -2
  30. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/__init__.pyi +2 -2
  31. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/__init__.pyi +2 -2
  32. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/checkpoint_lister.pyi +4 -4
  33. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/cards/lineage_card.pyi +2 -2
  34. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/checkpoint_storage.pyi +4 -4
  35. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/constructors.pyi +2 -2
  36. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/core.pyi +4 -4
  37. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/decorator.pyi +4 -4
  38. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/exceptions.pyi +2 -2
  39. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/final_api.pyi +3 -3
  40. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/checkpoints/lineage.pyi +2 -2
  41. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/__init__.pyi +2 -2
  42. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/context.pyi +3 -3
  43. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/core.pyi +3 -3
  44. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/decorator.pyi +2 -2
  45. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/exceptions.pyi +2 -2
  46. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/task_utils.pyi +2 -2
  47. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastore/utils.pyi +2 -2
  48. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/datastructures.pyi +2 -2
  49. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/exceptions.pyi +2 -2
  50. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/__init__.pyi +2 -2
  51. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/hf_hub/decorator.pyi +3 -3
  52. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/__init__.pyi +2 -2
  53. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/core.pyi +4 -4
  54. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/exceptions.pyi +2 -2
  55. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/modeling_utils/model_storage.pyi +3 -3
  56. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/__init__.pyi +2 -2
  57. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/flowspec_utils.pyi +2 -2
  58. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/general.pyi +2 -2
  59. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/identity_utils.pyi +2 -2
  60. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/__init__.pyi +2 -2
  61. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/base.pyi +2 -2
  62. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/serialization_handler/tar.pyi +2 -2
  63. metaflow-stubs/mf_extensions/obcheckpoint/plugins/machine_learning_utilities/utils/tar_utils.pyi +2 -2
  64. metaflow-stubs/mf_extensions/outerbounds/__init__.pyi +2 -2
  65. metaflow-stubs/mf_extensions/outerbounds/plugins/__init__.pyi +2 -2
  66. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/__init__.pyi +2 -2
  67. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/__init__.pyi +2 -2
  68. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_state_machine.pyi +2 -2
  69. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/__init__.pyi +2 -2
  70. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.pyi +2 -2
  71. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.pyi +2 -2
  72. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_cli.pyi +3 -3
  73. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/app_config.pyi +2 -2
  74. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/capsule.pyi +3 -3
  75. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/click_importer.pyi +2 -2
  76. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/__init__.pyi +2 -2
  77. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/code_package/code_packager.pyi +3 -3
  78. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/__init__.pyi +2 -2
  79. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/cli_generator.pyi +2 -2
  80. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/config_utils.pyi +3 -3
  81. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/schema_export.pyi +2 -2
  82. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/typed_configs.pyi +3 -3
  83. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/config/unified_config.pyi +2 -2
  84. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/dependencies.pyi +2 -2
  85. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/deployer.pyi +5 -5
  86. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/experimental/__init__.pyi +2 -2
  87. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/perimeters.pyi +2 -2
  88. metaflow-stubs/mf_extensions/outerbounds/plugins/apps/core/utils.pyi +4 -4
  89. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/__init__.pyi +2 -2
  90. metaflow-stubs/mf_extensions/outerbounds/plugins/aws/assume_role_decorator.pyi +2 -2
  91. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/__init__.pyi +2 -2
  92. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/async_cards.pyi +3 -3
  93. metaflow-stubs/mf_extensions/outerbounds/plugins/card_utilities/injector.pyi +2 -2
  94. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/__init__.pyi +2 -2
  95. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.pyi +4 -4
  96. metaflow-stubs/mf_extensions/outerbounds/plugins/checkpoint_datastores/nebius.pyi +4 -4
  97. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/__init__.pyi +2 -2
  98. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/baker.pyi +4 -4
  99. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/docker_environment.pyi +3 -3
  100. metaflow-stubs/mf_extensions/outerbounds/plugins/fast_bakery/fast_bakery.pyi +2 -2
  101. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/__init__.pyi +2 -2
  102. metaflow-stubs/mf_extensions/outerbounds/plugins/kubernetes/pod_killer.pyi +2 -2
  103. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/__init__.pyi +2 -2
  104. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/constants.pyi +2 -2
  105. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/exceptions.pyi +2 -2
  106. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/ollama.pyi +2 -2
  107. metaflow-stubs/mf_extensions/outerbounds/plugins/ollama/status_card.pyi +2 -2
  108. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/__init__.pyi +2 -2
  109. metaflow-stubs/mf_extensions/outerbounds/plugins/snowflake/snowflake.pyi +2 -2
  110. metaflow-stubs/mf_extensions/outerbounds/profilers/__init__.pyi +2 -2
  111. metaflow-stubs/mf_extensions/outerbounds/profilers/gpu.pyi +2 -2
  112. metaflow-stubs/mf_extensions/outerbounds/remote_config.pyi +3 -3
  113. metaflow-stubs/mf_extensions/outerbounds/toplevel/__init__.pyi +2 -2
  114. metaflow-stubs/mf_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.pyi +2 -2
  115. metaflow-stubs/mf_extensions/outerbounds/toplevel/s3_proxy.pyi +2 -2
  116. metaflow-stubs/multicore_utils.pyi +2 -2
  117. metaflow-stubs/ob_internal.pyi +2 -2
  118. metaflow-stubs/packaging_sys/__init__.pyi +8 -8
  119. metaflow-stubs/packaging_sys/backend.pyi +4 -4
  120. metaflow-stubs/packaging_sys/distribution_support.pyi +3 -3
  121. metaflow-stubs/packaging_sys/tar_backend.pyi +6 -6
  122. metaflow-stubs/packaging_sys/utils.pyi +2 -2
  123. metaflow-stubs/packaging_sys/v1.pyi +3 -3
  124. metaflow-stubs/parameters.pyi +3 -3
  125. metaflow-stubs/plugins/__init__.pyi +12 -12
  126. metaflow-stubs/plugins/airflow/__init__.pyi +2 -2
  127. metaflow-stubs/plugins/airflow/airflow_utils.pyi +2 -2
  128. metaflow-stubs/plugins/airflow/exception.pyi +2 -2
  129. metaflow-stubs/plugins/airflow/sensors/__init__.pyi +2 -2
  130. metaflow-stubs/plugins/airflow/sensors/base_sensor.pyi +2 -2
  131. metaflow-stubs/plugins/airflow/sensors/external_task_sensor.pyi +2 -2
  132. metaflow-stubs/plugins/airflow/sensors/s3_sensor.pyi +2 -2
  133. metaflow-stubs/plugins/argo/__init__.pyi +2 -2
  134. metaflow-stubs/plugins/argo/argo_client.pyi +6 -4
  135. metaflow-stubs/plugins/argo/argo_events.pyi +2 -2
  136. metaflow-stubs/plugins/argo/argo_workflows.pyi +10 -3
  137. metaflow-stubs/plugins/argo/argo_workflows_decorator.pyi +3 -3
  138. metaflow-stubs/plugins/argo/argo_workflows_deployer.pyi +4 -4
  139. metaflow-stubs/plugins/argo/argo_workflows_deployer_objects.pyi +2 -2
  140. metaflow-stubs/plugins/argo/exit_hooks.pyi +2 -2
  141. metaflow-stubs/plugins/aws/__init__.pyi +2 -2
  142. metaflow-stubs/plugins/aws/aws_client.pyi +2 -2
  143. metaflow-stubs/plugins/aws/aws_utils.pyi +2 -2
  144. metaflow-stubs/plugins/aws/batch/__init__.pyi +2 -2
  145. metaflow-stubs/plugins/aws/batch/batch.pyi +4 -4
  146. metaflow-stubs/plugins/aws/batch/batch_client.pyi +2 -2
  147. metaflow-stubs/plugins/aws/batch/batch_decorator.pyi +2 -2
  148. metaflow-stubs/plugins/aws/secrets_manager/__init__.pyi +2 -2
  149. metaflow-stubs/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.pyi +4 -4
  150. metaflow-stubs/plugins/aws/step_functions/__init__.pyi +2 -2
  151. metaflow-stubs/plugins/aws/step_functions/event_bridge_client.pyi +2 -2
  152. metaflow-stubs/plugins/aws/step_functions/schedule_decorator.pyi +2 -2
  153. metaflow-stubs/plugins/aws/step_functions/step_functions.pyi +3 -3
  154. metaflow-stubs/plugins/aws/step_functions/step_functions_client.pyi +2 -2
  155. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer.pyi +6 -3
  156. metaflow-stubs/plugins/aws/step_functions/step_functions_deployer_objects.pyi +3 -3
  157. metaflow-stubs/plugins/azure/__init__.pyi +2 -2
  158. metaflow-stubs/plugins/azure/azure_credential.pyi +2 -2
  159. metaflow-stubs/plugins/azure/azure_exceptions.pyi +2 -2
  160. metaflow-stubs/plugins/azure/azure_secret_manager_secrets_provider.pyi +4 -4
  161. metaflow-stubs/plugins/azure/azure_utils.pyi +2 -2
  162. metaflow-stubs/plugins/azure/blob_service_client_factory.pyi +2 -2
  163. metaflow-stubs/plugins/azure/includefile_support.pyi +2 -2
  164. metaflow-stubs/plugins/cards/__init__.pyi +2 -2
  165. metaflow-stubs/plugins/cards/card_client.pyi +3 -3
  166. metaflow-stubs/plugins/cards/card_creator.pyi +2 -2
  167. metaflow-stubs/plugins/cards/card_datastore.pyi +2 -2
  168. metaflow-stubs/plugins/cards/card_decorator.pyi +2 -2
  169. metaflow-stubs/plugins/cards/card_modules/__init__.pyi +2 -2
  170. metaflow-stubs/plugins/cards/card_modules/basic.pyi +3 -3
  171. metaflow-stubs/plugins/cards/card_modules/card.pyi +2 -2
  172. metaflow-stubs/plugins/cards/card_modules/components.pyi +3 -3
  173. metaflow-stubs/plugins/cards/card_modules/convert_to_native_type.pyi +2 -2
  174. metaflow-stubs/plugins/cards/card_modules/renderer_tools.pyi +2 -2
  175. metaflow-stubs/plugins/cards/card_modules/test_cards.pyi +2 -2
  176. metaflow-stubs/plugins/cards/card_resolver.pyi +2 -2
  177. metaflow-stubs/plugins/cards/component_serializer.pyi +2 -2
  178. metaflow-stubs/plugins/cards/exception.pyi +2 -2
  179. metaflow-stubs/plugins/catch_decorator.pyi +2 -2
  180. metaflow-stubs/plugins/datatools/__init__.pyi +2 -2
  181. metaflow-stubs/plugins/datatools/local.pyi +2 -2
  182. metaflow-stubs/plugins/datatools/s3/__init__.pyi +2 -2
  183. metaflow-stubs/plugins/datatools/s3/s3.pyi +3 -3
  184. metaflow-stubs/plugins/datatools/s3/s3tail.pyi +2 -2
  185. metaflow-stubs/plugins/datatools/s3/s3util.pyi +2 -2
  186. metaflow-stubs/plugins/debug_logger.pyi +2 -2
  187. metaflow-stubs/plugins/debug_monitor.pyi +2 -2
  188. metaflow-stubs/plugins/environment_decorator.pyi +2 -2
  189. metaflow-stubs/plugins/events_decorator.pyi +2 -2
  190. metaflow-stubs/plugins/exit_hook/__init__.pyi +2 -2
  191. metaflow-stubs/plugins/exit_hook/exit_hook_decorator.pyi +2 -2
  192. metaflow-stubs/plugins/frameworks/__init__.pyi +2 -2
  193. metaflow-stubs/plugins/frameworks/pytorch.pyi +2 -2
  194. metaflow-stubs/plugins/gcp/__init__.pyi +2 -2
  195. metaflow-stubs/plugins/gcp/gcp_secret_manager_secrets_provider.pyi +4 -4
  196. metaflow-stubs/plugins/gcp/gs_exceptions.pyi +2 -2
  197. metaflow-stubs/plugins/gcp/gs_storage_client_factory.pyi +2 -2
  198. metaflow-stubs/plugins/gcp/gs_utils.pyi +2 -2
  199. metaflow-stubs/plugins/gcp/includefile_support.pyi +2 -2
  200. metaflow-stubs/plugins/kubernetes/__init__.pyi +2 -2
  201. metaflow-stubs/plugins/kubernetes/kube_utils.pyi +2 -2
  202. metaflow-stubs/plugins/kubernetes/kubernetes.pyi +2 -2
  203. metaflow-stubs/plugins/kubernetes/kubernetes_client.pyi +2 -2
  204. metaflow-stubs/plugins/kubernetes/kubernetes_decorator.pyi +2 -2
  205. metaflow-stubs/plugins/kubernetes/kubernetes_jobsets.pyi +2 -2
  206. metaflow-stubs/plugins/kubernetes/spot_monitor_sidecar.pyi +2 -2
  207. metaflow-stubs/plugins/ollama/__init__.pyi +2 -2
  208. metaflow-stubs/plugins/optuna/__init__.pyi +2 -2
  209. metaflow-stubs/plugins/parallel_decorator.pyi +2 -2
  210. metaflow-stubs/plugins/perimeters.pyi +2 -2
  211. metaflow-stubs/plugins/project_decorator.pyi +2 -2
  212. metaflow-stubs/plugins/pypi/__init__.pyi +3 -3
  213. metaflow-stubs/plugins/pypi/conda_decorator.pyi +2 -2
  214. metaflow-stubs/plugins/pypi/conda_environment.pyi +5 -5
  215. metaflow-stubs/plugins/pypi/parsers.pyi +2 -2
  216. metaflow-stubs/plugins/pypi/pypi_decorator.pyi +2 -2
  217. metaflow-stubs/plugins/pypi/pypi_environment.pyi +2 -2
  218. metaflow-stubs/plugins/pypi/utils.pyi +2 -2
  219. metaflow-stubs/plugins/resources_decorator.pyi +2 -2
  220. metaflow-stubs/plugins/retry_decorator.pyi +2 -2
  221. metaflow-stubs/plugins/secrets/__init__.pyi +3 -3
  222. metaflow-stubs/plugins/secrets/inline_secrets_provider.pyi +4 -4
  223. metaflow-stubs/plugins/secrets/secrets_decorator.pyi +2 -2
  224. metaflow-stubs/plugins/secrets/secrets_func.pyi +2 -2
  225. metaflow-stubs/plugins/secrets/secrets_spec.pyi +2 -2
  226. metaflow-stubs/plugins/secrets/utils.pyi +2 -2
  227. metaflow-stubs/plugins/snowflake/__init__.pyi +2 -2
  228. metaflow-stubs/plugins/storage_executor.pyi +2 -2
  229. metaflow-stubs/plugins/test_unbounded_foreach_decorator.pyi +2 -2
  230. metaflow-stubs/plugins/timeout_decorator.pyi +2 -2
  231. metaflow-stubs/plugins/torchtune/__init__.pyi +2 -2
  232. metaflow-stubs/plugins/uv/__init__.pyi +2 -2
  233. metaflow-stubs/plugins/uv/uv_environment.pyi +3 -3
  234. metaflow-stubs/profilers/__init__.pyi +2 -2
  235. metaflow-stubs/pylint_wrapper.pyi +2 -2
  236. metaflow-stubs/runner/__init__.pyi +2 -2
  237. metaflow-stubs/runner/deployer.pyi +33 -33
  238. metaflow-stubs/runner/deployer_impl.pyi +2 -2
  239. metaflow-stubs/runner/metaflow_runner.pyi +3 -3
  240. metaflow-stubs/runner/nbdeploy.pyi +2 -2
  241. metaflow-stubs/runner/nbrun.pyi +2 -2
  242. metaflow-stubs/runner/subprocess_manager.pyi +2 -2
  243. metaflow-stubs/runner/utils.pyi +3 -3
  244. metaflow-stubs/system/__init__.pyi +2 -2
  245. metaflow-stubs/system/system_logger.pyi +2 -2
  246. metaflow-stubs/system/system_monitor.pyi +2 -2
  247. metaflow-stubs/tagging_util.pyi +2 -2
  248. metaflow-stubs/tuple_util.pyi +2 -2
  249. metaflow-stubs/user_configs/__init__.pyi +2 -2
  250. metaflow-stubs/user_configs/config_options.pyi +3 -3
  251. metaflow-stubs/user_configs/config_parameters.pyi +7 -7
  252. metaflow-stubs/user_decorators/__init__.pyi +2 -2
  253. metaflow-stubs/user_decorators/common.pyi +2 -2
  254. metaflow-stubs/user_decorators/mutable_flow.pyi +5 -5
  255. metaflow-stubs/user_decorators/mutable_step.pyi +5 -5
  256. metaflow-stubs/user_decorators/user_flow_decorator.pyi +3 -3
  257. metaflow-stubs/user_decorators/user_step_decorator.pyi +5 -5
  258. {ob_metaflow_stubs-6.0.10.0.dist-info → ob_metaflow_stubs-6.0.10.1.dist-info}/METADATA +1 -1
  259. ob_metaflow_stubs-6.0.10.1.dist-info/RECORD +262 -0
  260. ob_metaflow_stubs-6.0.10.0.dist-info/RECORD +0 -262
  261. {ob_metaflow_stubs-6.0.10.0.dist-info → ob_metaflow_stubs-6.0.10.1.dist-info}/WHEEL +0 -0
  262. {ob_metaflow_stubs-6.0.10.0.dist-info → ob_metaflow_stubs-6.0.10.1.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,15 @@
1
1
  ######################################################################################################
2
2
  # Auto-generated Metaflow stub file #
3
- # MF version: 2.18.2.1+obcheckpoint(0.2.4);ob(v1) #
4
- # Generated on 2025-09-08T21:00:14.553698 #
3
+ # MF version: 2.18.3.2+obcheckpoint(0.2.4);ob(v1) #
4
+ # Generated on 2025-09-09T09:20:35.730188 #
5
5
  ######################################################################################################
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
9
  import typing
10
10
  if typing.TYPE_CHECKING:
11
- import datetime
12
11
  import typing
12
+ import datetime
13
13
  FlowSpecDerived = typing.TypeVar("FlowSpecDerived", bound="FlowSpec", contravariant=False, covariant=False)
14
14
  StepFlag = typing.NewType("StepFlag", bool)
15
15
 
@@ -40,17 +40,17 @@ from .user_decorators.user_step_decorator import StepMutator as StepMutator
40
40
  from .user_decorators.user_step_decorator import user_step_decorator as user_step_decorator
41
41
  from .user_decorators.user_flow_decorator import FlowMutator as FlowMutator
42
42
  from . import cards as cards
43
+ from . import tuple_util as tuple_util
43
44
  from . import metaflow_git as metaflow_git
44
45
  from . import events as events
45
- from . import tuple_util as tuple_util
46
46
  from . import runner as runner
47
47
  from . import plugins as plugins
48
48
  from .mf_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import S3 as S3
49
49
  from . import includefile as includefile
50
50
  from .includefile import IncludeFile as IncludeFile
51
- from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
52
51
  from .plugins.pypi.parsers import pyproject_toml_parser as pyproject_toml_parser
53
52
  from .plugins.pypi.parsers import conda_environment_yml_parser as conda_environment_yml_parser
53
+ from .plugins.pypi.parsers import requirements_txt_parser as requirements_txt_parser
54
54
  from . import client as client
55
55
  from .client.core import namespace as namespace
56
56
  from .client.core import get_namespace as get_namespace
@@ -167,580 +167,536 @@ def step(f: typing.Union[typing.Callable[[FlowSpecDerived], None], typing.Callab
167
167
  """
168
168
  ...
169
169
 
170
- @typing.overload
171
- def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
170
+ def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
172
171
  """
173
- Specifies the resources needed when executing this step.
174
-
175
- Use `@resources` to specify the resource requirements
176
- independently of the specific compute layer (`@batch`, `@kubernetes`).
177
-
178
- You can choose the compute layer on the command line by executing e.g.
179
- ```
180
- python myflow.py run --with batch
181
- ```
182
- or
183
- ```
184
- python myflow.py run --with kubernetes
185
- ```
186
- which executes the flow on the desired system using the
187
- requirements specified in `@resources`.
172
+ This decorator is used to run vllm APIs as Metaflow task sidecars.
188
173
 
174
+ User code call
175
+ --------------
176
+ @vllm(
177
+ model="...",
178
+ ...
179
+ )
189
180
 
190
- Parameters
191
- ----------
192
- cpu : int, default 1
193
- Number of CPUs required for this step.
194
- gpu : int, optional, default None
195
- Number of GPUs required for this step.
196
- disk : int, optional, default None
197
- Disk size (in MB) required for this step. Only applies on Kubernetes.
198
- memory : int, default 4096
199
- Memory size (in MB) required for this step.
200
- shared_memory : int, optional, default None
201
- The value for the size (in MiB) of the /dev/shm volume for this step.
202
- This parameter maps to the `--shm-size` option in Docker.
203
- """
204
- ...
205
-
206
- @typing.overload
207
- def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
208
- ...
209
-
210
- @typing.overload
211
- def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
212
- ...
213
-
214
- def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
215
- """
216
- Specifies the resources needed when executing this step.
181
+ Valid backend options
182
+ ---------------------
183
+ - 'local': Run as a separate process on the local task machine.
217
184
 
218
- Use `@resources` to specify the resource requirements
219
- independently of the specific compute layer (`@batch`, `@kubernetes`).
185
+ Valid model options
186
+ -------------------
187
+ Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
220
188
 
221
- You can choose the compute layer on the command line by executing e.g.
222
- ```
223
- python myflow.py run --with batch
224
- ```
225
- or
226
- ```
227
- python myflow.py run --with kubernetes
228
- ```
229
- which executes the flow on the desired system using the
230
- requirements specified in `@resources`.
189
+ NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
190
+ If you need multiple models, you must create multiple @vllm decorators.
231
191
 
232
192
 
233
193
  Parameters
234
194
  ----------
235
- cpu : int, default 1
236
- Number of CPUs required for this step.
237
- gpu : int, optional, default None
238
- Number of GPUs required for this step.
239
- disk : int, optional, default None
240
- Disk size (in MB) required for this step. Only applies on Kubernetes.
241
- memory : int, default 4096
242
- Memory size (in MB) required for this step.
243
- shared_memory : int, optional, default None
244
- The value for the size (in MiB) of the /dev/shm volume for this step.
245
- This parameter maps to the `--shm-size` option in Docker.
246
- """
247
- ...
248
-
249
- @typing.overload
250
- def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
251
- """
252
- A simple decorator that demonstrates using CardDecoratorInjector
253
- to inject a card and render simple markdown content.
254
- """
255
- ...
256
-
257
- @typing.overload
258
- def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
259
- ...
260
-
261
- def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
262
- """
263
- A simple decorator that demonstrates using CardDecoratorInjector
264
- to inject a card and render simple markdown content.
195
+ model: str
196
+ HuggingFace model identifier to be served by vLLM.
197
+ backend: str
198
+ Determines where and how to run the vLLM process.
199
+ openai_api_server: bool
200
+ Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
201
+ Default is False (uses native engine).
202
+ Set to True for backward compatibility with existing code.
203
+ debug: bool
204
+ Whether to turn on verbose debugging logs.
205
+ card_refresh_interval: int
206
+ Interval in seconds for refreshing the vLLM status card.
207
+ Only used when openai_api_server=True.
208
+ max_retries: int
209
+ Maximum number of retries checking for vLLM server startup.
210
+ Only used when openai_api_server=True.
211
+ retry_alert_frequency: int
212
+ Frequency of alert logs for vLLM server startup retries.
213
+ Only used when openai_api_server=True.
214
+ engine_args : dict
215
+ Additional keyword arguments to pass to the vLLM engine.
216
+ For example, `tensor_parallel_size=2`.
265
217
  """
266
218
  ...
267
219
 
268
220
  @typing.overload
269
- def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
221
+ def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
270
222
  """
271
- Specifies environment variables to be set prior to the execution of a step.
223
+ Specifies that the step will success under all circumstances.
224
+
225
+ The decorator will create an optional artifact, specified by `var`, which
226
+ contains the exception raised. You can use it to detect the presence
227
+ of errors, indicating that all happy-path artifacts produced by the step
228
+ are missing.
272
229
 
273
230
 
274
231
  Parameters
275
232
  ----------
276
- vars : Dict[str, str], default {}
277
- Dictionary of environment variables to set.
233
+ var : str, optional, default None
234
+ Name of the artifact in which to store the caught exception.
235
+ If not specified, the exception is not stored.
236
+ print_exception : bool, default True
237
+ Determines whether or not the exception is printed to
238
+ stdout when caught.
278
239
  """
279
240
  ...
280
241
 
281
242
  @typing.overload
282
- def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
243
+ def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
283
244
  ...
284
245
 
285
246
  @typing.overload
286
- def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
287
- ...
288
-
289
- def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
290
- """
291
- Specifies environment variables to be set prior to the execution of a step.
292
-
293
-
294
- Parameters
295
- ----------
296
- vars : Dict[str, str], default {}
297
- Dictionary of environment variables to set.
298
- """
247
+ def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
299
248
  ...
300
249
 
301
- @typing.overload
302
- def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
250
+ def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
303
251
  """
304
- Specifies the Conda environment for the step.
252
+ Specifies that the step will success under all circumstances.
305
253
 
306
- Information in this decorator will augment any
307
- attributes set in the `@conda_base` flow-level decorator. Hence,
308
- you can use `@conda_base` to set packages required by all
309
- steps and use `@conda` to specify step-specific overrides.
254
+ The decorator will create an optional artifact, specified by `var`, which
255
+ contains the exception raised. You can use it to detect the presence
256
+ of errors, indicating that all happy-path artifacts produced by the step
257
+ are missing.
310
258
 
311
259
 
312
260
  Parameters
313
261
  ----------
314
- packages : Dict[str, str], default {}
315
- Packages to use for this step. The key is the name of the package
316
- and the value is the version to use.
317
- libraries : Dict[str, str], default {}
318
- Supported for backward compatibility. When used with packages, packages will take precedence.
319
- python : str, optional, default None
320
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
321
- that the version used will correspond to the version of the Python interpreter used to start the run.
322
- disabled : bool, default False
323
- If set to True, disables @conda.
262
+ var : str, optional, default None
263
+ Name of the artifact in which to store the caught exception.
264
+ If not specified, the exception is not stored.
265
+ print_exception : bool, default True
266
+ Determines whether or not the exception is printed to
267
+ stdout when caught.
324
268
  """
325
269
  ...
326
270
 
327
- @typing.overload
328
- def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
329
- ...
330
-
331
- @typing.overload
332
- def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
333
- ...
334
-
335
- def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
271
+ def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
336
272
  """
337
- Specifies the Conda environment for the step.
338
-
339
- Information in this decorator will augment any
340
- attributes set in the `@conda_base` flow-level decorator. Hence,
341
- you can use `@conda_base` to set packages required by all
342
- steps and use `@conda` to specify step-specific overrides.
273
+ Specifies that this step should execute on DGX cloud.
343
274
 
344
275
 
345
276
  Parameters
346
277
  ----------
347
- packages : Dict[str, str], default {}
348
- Packages to use for this step. The key is the name of the package
349
- and the value is the version to use.
350
- libraries : Dict[str, str], default {}
351
- Supported for backward compatibility. When used with packages, packages will take precedence.
352
- python : str, optional, default None
353
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
354
- that the version used will correspond to the version of the Python interpreter used to start the run.
355
- disabled : bool, default False
356
- If set to True, disables @conda.
278
+ gpu : int
279
+ Number of GPUs to use.
280
+ gpu_type : str
281
+ Type of Nvidia GPU to use.
282
+ queue_timeout : int
283
+ Time to keep the job in NVCF's queue.
357
284
  """
358
285
  ...
359
286
 
360
- @typing.overload
361
- def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
287
+ def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
362
288
  """
363
- Specifies the number of times the task corresponding
364
- to a step needs to be retried.
365
-
366
- This decorator is useful for handling transient errors, such as networking issues.
367
- If your task contains operations that can't be retried safely, e.g. database updates,
368
- it is advisable to annotate it with `@retry(times=0)`.
369
-
370
- This can be used in conjunction with the `@catch` decorator. The `@catch`
371
- decorator will execute a no-op task after all retries have been exhausted,
372
- ensuring that the flow execution can continue.
289
+ Specifies that this step should execute on Kubernetes.
373
290
 
374
291
 
375
292
  Parameters
376
293
  ----------
377
- times : int, default 3
378
- Number of times to retry this task.
379
- minutes_between_retries : int, default 2
380
- Number of minutes between retries.
381
- """
382
- ...
383
-
384
- @typing.overload
385
- def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
386
- ...
387
-
388
- @typing.overload
389
- def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
390
- ...
391
-
392
- def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
393
- """
394
- Specifies the number of times the task corresponding
395
- to a step needs to be retried.
396
-
397
- This decorator is useful for handling transient errors, such as networking issues.
398
- If your task contains operations that can't be retried safely, e.g. database updates,
399
- it is advisable to annotate it with `@retry(times=0)`.
400
-
401
- This can be used in conjunction with the `@catch` decorator. The `@catch`
402
- decorator will execute a no-op task after all retries have been exhausted,
403
- ensuring that the flow execution can continue.
404
-
294
+ cpu : int, default 1
295
+ Number of CPUs required for this step. If `@resources` is
296
+ also present, the maximum value from all decorators is used.
297
+ memory : int, default 4096
298
+ Memory size (in MB) required for this step. If
299
+ `@resources` is also present, the maximum value from all decorators is
300
+ used.
301
+ disk : int, default 10240
302
+ Disk size (in MB) required for this step. If
303
+ `@resources` is also present, the maximum value from all decorators is
304
+ used.
305
+ image : str, optional, default None
306
+ Docker image to use when launching on Kubernetes. If not specified, and
307
+ METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
308
+ not, a default Docker image mapping to the current version of Python is used.
309
+ image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
310
+ If given, the imagePullPolicy to be applied to the Docker image of the step.
311
+ image_pull_secrets: List[str], default []
312
+ The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
313
+ Kubernetes image pull secrets to use when pulling container images
314
+ in Kubernetes.
315
+ service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
316
+ Kubernetes service account to use when launching pod in Kubernetes.
317
+ secrets : List[str], optional, default None
318
+ Kubernetes secrets to use when launching pod in Kubernetes. These
319
+ secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
320
+ in Metaflow configuration.
321
+ node_selector: Union[Dict[str,str], str], optional, default None
322
+ Kubernetes node selector(s) to apply to the pod running the task.
323
+ Can be passed in as a comma separated string of values e.g.
324
+ 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
325
+ {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
326
+ namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
327
+ Kubernetes namespace to use when launching pod in Kubernetes.
328
+ gpu : int, optional, default None
329
+ Number of GPUs required for this step. A value of zero implies that
330
+ the scheduled node should not have GPUs.
331
+ gpu_vendor : str, default KUBERNETES_GPU_VENDOR
332
+ The vendor of the GPUs to be used for this step.
333
+ tolerations : List[Dict[str,str]], default []
334
+ The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
335
+ Kubernetes tolerations to use when launching pod in Kubernetes.
336
+ labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
337
+ Kubernetes labels to use when launching pod in Kubernetes.
338
+ annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
339
+ Kubernetes annotations to use when launching pod in Kubernetes.
340
+ use_tmpfs : bool, default False
341
+ This enables an explicit tmpfs mount for this step.
342
+ tmpfs_tempdir : bool, default True
343
+ sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
344
+ tmpfs_size : int, optional, default: None
345
+ The value for the size (in MiB) of the tmpfs mount for this step.
346
+ This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
347
+ memory allocated for this step.
348
+ tmpfs_path : str, optional, default /metaflow_temp
349
+ Path to tmpfs mount for this step.
350
+ persistent_volume_claims : Dict[str, str], optional, default None
351
+ A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
352
+ volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
353
+ shared_memory: int, optional
354
+ Shared memory size (in MiB) required for this step
355
+ port: int, optional
356
+ Port number to specify in the Kubernetes job object
357
+ compute_pool : str, optional, default None
358
+ Compute pool to be used for for this step.
359
+ If not specified, any accessible compute pool within the perimeter is used.
360
+ hostname_resolution_timeout: int, default 10 * 60
361
+ Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
362
+ Only applicable when @parallel is used.
363
+ qos: str, default: Burstable
364
+ Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
405
365
 
406
- Parameters
407
- ----------
408
- times : int, default 3
409
- Number of times to retry this task.
410
- minutes_between_retries : int, default 2
411
- Number of minutes between retries.
366
+ security_context: Dict[str, Any], optional, default None
367
+ Container security context. Applies to the task container. Allows the following keys:
368
+ - privileged: bool, optional, default None
369
+ - allow_privilege_escalation: bool, optional, default None
370
+ - run_as_user: int, optional, default None
371
+ - run_as_group: int, optional, default None
372
+ - run_as_non_root: bool, optional, default None
412
373
  """
413
374
  ...
414
375
 
415
- def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
376
+ @typing.overload
377
+ def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
416
378
  """
417
- Specifies that this step should execute on DGX cloud.
379
+ Enables loading / saving of models within a step.
418
380
 
381
+ > Examples
382
+ - Saving Models
383
+ ```python
384
+ @model
385
+ @step
386
+ def train(self):
387
+ # current.model.save returns a dictionary reference to the model saved
388
+ self.my_model = current.model.save(
389
+ path_to_my_model,
390
+ label="my_model",
391
+ metadata={
392
+ "epochs": 10,
393
+ "batch-size": 32,
394
+ "learning-rate": 0.001,
395
+ }
396
+ )
397
+ self.next(self.test)
419
398
 
420
- Parameters
421
- ----------
422
- gpu : int
423
- Number of GPUs to use.
424
- gpu_type : str
425
- Type of Nvidia GPU to use.
426
- """
427
- ...
428
-
429
- def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
430
- """
431
- S3 Proxy decorator for routing S3 requests through a local proxy service.
399
+ @model(load="my_model")
400
+ @step
401
+ def test(self):
402
+ # `current.model.loaded` returns a dictionary of the loaded models
403
+ # where the key is the name of the artifact and the value is the path to the model
404
+ print(os.listdir(current.model.loaded["my_model"]))
405
+ self.next(self.end)
406
+ ```
407
+
408
+ - Loading models
409
+ ```python
410
+ @step
411
+ def train(self):
412
+ # current.model.load returns the path to the model loaded
413
+ checkpoint_path = current.model.load(
414
+ self.checkpoint_key,
415
+ )
416
+ model_path = current.model.load(
417
+ self.model,
418
+ )
419
+ self.next(self.test)
420
+ ```
432
421
 
433
422
 
434
423
  Parameters
435
424
  ----------
436
- integration_name : str, optional
437
- Name of the S3 proxy integration. If not specified, will use the only
438
- available S3 proxy integration in the namespace (fails if multiple exist).
439
- write_mode : str, optional
440
- The desired behavior during write operations to target (origin) S3 bucket.
441
- allowed options are:
442
- "origin-and-cache" -> write to both the target S3 bucket and local object
443
- storage
444
- "origin" -> only write to the target S3 bucket
445
- "cache" -> only write to the object storage service used for caching
446
- debug : bool, optional
447
- Enable debug logging for proxy operations.
425
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
426
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
427
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
428
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
429
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
430
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
431
+
432
+ temp_dir_root : str, default: None
433
+ The root directory under which `current.model.loaded` will store loaded models
448
434
  """
449
435
  ...
450
436
 
451
- def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
437
+ @typing.overload
438
+ def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
439
+ ...
440
+
441
+ @typing.overload
442
+ def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
443
+ ...
444
+
445
+ def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
452
446
  """
453
- This decorator is used to run Ollama APIs as Metaflow task sidecars.
447
+ Enables loading / saving of models within a step.
454
448
 
455
- User code call
456
- --------------
457
- @ollama(
458
- models=[...],
459
- ...
460
- )
449
+ > Examples
450
+ - Saving Models
451
+ ```python
452
+ @model
453
+ @step
454
+ def train(self):
455
+ # current.model.save returns a dictionary reference to the model saved
456
+ self.my_model = current.model.save(
457
+ path_to_my_model,
458
+ label="my_model",
459
+ metadata={
460
+ "epochs": 10,
461
+ "batch-size": 32,
462
+ "learning-rate": 0.001,
463
+ }
464
+ )
465
+ self.next(self.test)
461
466
 
462
- Valid backend options
463
- ---------------------
464
- - 'local': Run as a separate process on the local task machine.
465
- - (TODO) 'managed': Outerbounds hosts and selects compute provider.
466
- - (TODO) 'remote': Spin up separate instance to serve Ollama models.
467
+ @model(load="my_model")
468
+ @step
469
+ def test(self):
470
+ # `current.model.loaded` returns a dictionary of the loaded models
471
+ # where the key is the name of the artifact and the value is the path to the model
472
+ print(os.listdir(current.model.loaded["my_model"]))
473
+ self.next(self.end)
474
+ ```
467
475
 
468
- Valid model options
469
- -------------------
470
- Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
476
+ - Loading models
477
+ ```python
478
+ @step
479
+ def train(self):
480
+ # current.model.load returns the path to the model loaded
481
+ checkpoint_path = current.model.load(
482
+ self.checkpoint_key,
483
+ )
484
+ model_path = current.model.load(
485
+ self.model,
486
+ )
487
+ self.next(self.test)
488
+ ```
471
489
 
472
490
 
473
491
  Parameters
474
492
  ----------
475
- models: list[str]
476
- List of Ollama containers running models in sidecars.
477
- backend: str
478
- Determines where and how to run the Ollama process.
479
- force_pull: bool
480
- Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
481
- cache_update_policy: str
482
- Cache update policy: "auto", "force", or "never".
483
- force_cache_update: bool
484
- Simple override for "force" cache update policy.
485
- debug: bool
486
- Whether to turn on verbose debugging logs.
487
- circuit_breaker_config: dict
488
- Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
489
- timeout_config: dict
490
- Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
493
+ load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
494
+ Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
495
+ These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
496
+ If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
497
+ the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
498
+ If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
499
+
500
+ temp_dir_root : str, default: None
501
+ The root directory under which `current.model.loaded` will store loaded models
491
502
  """
492
503
  ...
493
504
 
494
505
  @typing.overload
495
- def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
506
+ def resources(*, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
496
507
  """
497
- Specifies secrets to be retrieved and injected as environment variables prior to
498
- the execution of a step.
508
+ Specifies the resources needed when executing this step.
509
+
510
+ Use `@resources` to specify the resource requirements
511
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
512
+
513
+ You can choose the compute layer on the command line by executing e.g.
514
+ ```
515
+ python myflow.py run --with batch
516
+ ```
517
+ or
518
+ ```
519
+ python myflow.py run --with kubernetes
520
+ ```
521
+ which executes the flow on the desired system using the
522
+ requirements specified in `@resources`.
499
523
 
500
524
 
501
525
  Parameters
502
526
  ----------
503
- sources : List[Union[str, Dict[str, Any]]], default: []
504
- List of secret specs, defining how the secrets are to be retrieved
505
- role : str, optional, default: None
506
- Role to use for fetching secrets
527
+ cpu : int, default 1
528
+ Number of CPUs required for this step.
529
+ gpu : int, optional, default None
530
+ Number of GPUs required for this step.
531
+ disk : int, optional, default None
532
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
533
+ memory : int, default 4096
534
+ Memory size (in MB) required for this step.
535
+ shared_memory : int, optional, default None
536
+ The value for the size (in MiB) of the /dev/shm volume for this step.
537
+ This parameter maps to the `--shm-size` option in Docker.
507
538
  """
508
539
  ...
509
540
 
510
541
  @typing.overload
511
- def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
542
+ def resources(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
512
543
  ...
513
544
 
514
545
  @typing.overload
515
- def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
546
+ def resources(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
516
547
  ...
517
548
 
518
- def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
549
+ def resources(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, cpu: int = 1, gpu: typing.Optional[int] = None, disk: typing.Optional[int] = None, memory: int = 4096, shared_memory: typing.Optional[int] = None):
519
550
  """
520
- Specifies secrets to be retrieved and injected as environment variables prior to
521
- the execution of a step.
551
+ Specifies the resources needed when executing this step.
552
+
553
+ Use `@resources` to specify the resource requirements
554
+ independently of the specific compute layer (`@batch`, `@kubernetes`).
555
+
556
+ You can choose the compute layer on the command line by executing e.g.
557
+ ```
558
+ python myflow.py run --with batch
559
+ ```
560
+ or
561
+ ```
562
+ python myflow.py run --with kubernetes
563
+ ```
564
+ which executes the flow on the desired system using the
565
+ requirements specified in `@resources`.
522
566
 
523
567
 
524
568
  Parameters
525
569
  ----------
526
- sources : List[Union[str, Dict[str, Any]]], default: []
527
- List of secret specs, defining how the secrets are to be retrieved
528
- role : str, optional, default: None
529
- Role to use for fetching secrets
570
+ cpu : int, default 1
571
+ Number of CPUs required for this step.
572
+ gpu : int, optional, default None
573
+ Number of GPUs required for this step.
574
+ disk : int, optional, default None
575
+ Disk size (in MB) required for this step. Only applies on Kubernetes.
576
+ memory : int, default 4096
577
+ Memory size (in MB) required for this step.
578
+ shared_memory : int, optional, default None
579
+ The value for the size (in MiB) of the /dev/shm volume for this step.
580
+ This parameter maps to the `--shm-size` option in Docker.
530
581
  """
531
582
  ...
532
583
 
533
- def nvidia(*, gpu: int, gpu_type: str, queue_timeout: int) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
584
+ def s3_proxy(*, integration_name: typing.Optional[str] = None, write_mode: typing.Optional[str] = None, debug: typing.Optional[bool] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
534
585
  """
535
- Specifies that this step should execute on DGX cloud.
586
+ S3 Proxy decorator for routing S3 requests through a local proxy service.
536
587
 
537
588
 
538
589
  Parameters
539
590
  ----------
540
- gpu : int
541
- Number of GPUs to use.
542
- gpu_type : str
543
- Type of Nvidia GPU to use.
544
- queue_timeout : int
545
- Time to keep the job in NVCF's queue.
591
+ integration_name : str, optional
592
+ Name of the S3 proxy integration. If not specified, will use the only
593
+ available S3 proxy integration in the namespace (fails if multiple exist).
594
+ write_mode : str, optional
595
+ The desired behavior during write operations to target (origin) S3 bucket.
596
+ allowed options are:
597
+ "origin-and-cache" -> write to both the target S3 bucket and local object
598
+ storage
599
+ "origin" -> only write to the target S3 bucket
600
+ "cache" -> only write to the object storage service used for caching
601
+ debug : bool, optional
602
+ Enable debug logging for proxy operations.
546
603
  """
547
604
  ...
548
605
 
549
606
  @typing.overload
550
- def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
607
+ def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
551
608
  """
552
- Enables checkpointing for a step.
553
-
554
- > Examples
555
-
556
- - Saving Checkpoints
557
-
558
- ```python
559
- @checkpoint
560
- @step
561
- def train(self):
562
- model = create_model(self.parameters, checkpoint_path = None)
563
- for i in range(self.epochs):
564
- # some training logic
565
- loss = model.train(self.dataset)
566
- if i % 10 == 0:
567
- model.save(
568
- current.checkpoint.directory,
569
- )
570
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
571
- # and returns a reference dictionary to the checkpoint saved in the datastore
572
- self.latest_checkpoint = current.checkpoint.save(
573
- name="epoch_checkpoint",
574
- metadata={
575
- "epoch": i,
576
- "loss": loss,
577
- }
578
- )
579
- ```
580
-
581
- - Using Loaded Checkpoints
582
-
583
- ```python
584
- @retry(times=3)
585
- @checkpoint
586
- @step
587
- def train(self):
588
- # Assume that the task has restarted and the previous attempt of the task
589
- # saved a checkpoint
590
- checkpoint_path = None
591
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
592
- print("Loaded checkpoint from the previous attempt")
593
- checkpoint_path = current.checkpoint.directory
609
+ Specifies a timeout for your step.
594
610
 
595
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
596
- for i in range(self.epochs):
597
- ...
598
- ```
611
+ This decorator is useful if this step may hang indefinitely.
599
612
 
613
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
614
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
615
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
600
616
 
601
- Parameters
602
- ----------
603
- load_policy : str, default: "fresh"
604
- The policy for loading the checkpoint. The following policies are supported:
605
- - "eager": Loads the the latest available checkpoint within the namespace.
606
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
607
- will be loaded at the start of the task.
608
- - "none": Do not load any checkpoint
609
- - "fresh": Loads the lastest checkpoint created within the running Task.
610
- This mode helps loading checkpoints across various retry attempts of the same task.
611
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
612
- created within the task will be loaded when the task is retries execution on failure.
617
+ Note that all the values specified in parameters are added together so if you specify
618
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
613
619
 
614
- temp_dir_root : str, default: None
615
- The root directory under which `current.checkpoint.directory` will be created.
620
+
621
+ Parameters
622
+ ----------
623
+ seconds : int, default 0
624
+ Number of seconds to wait prior to timing out.
625
+ minutes : int, default 0
626
+ Number of minutes to wait prior to timing out.
627
+ hours : int, default 0
628
+ Number of hours to wait prior to timing out.
616
629
  """
617
630
  ...
618
631
 
619
632
  @typing.overload
620
- def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
633
+ def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
621
634
  ...
622
635
 
623
636
  @typing.overload
624
- def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
637
+ def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
625
638
  ...
626
639
 
627
- def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
640
+ def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
628
641
  """
629
- Enables checkpointing for a step.
630
-
631
- > Examples
632
-
633
- - Saving Checkpoints
634
-
635
- ```python
636
- @checkpoint
637
- @step
638
- def train(self):
639
- model = create_model(self.parameters, checkpoint_path = None)
640
- for i in range(self.epochs):
641
- # some training logic
642
- loss = model.train(self.dataset)
643
- if i % 10 == 0:
644
- model.save(
645
- current.checkpoint.directory,
646
- )
647
- # saves the contents of the `current.checkpoint.directory` as a checkpoint
648
- # and returns a reference dictionary to the checkpoint saved in the datastore
649
- self.latest_checkpoint = current.checkpoint.save(
650
- name="epoch_checkpoint",
651
- metadata={
652
- "epoch": i,
653
- "loss": loss,
654
- }
655
- )
656
- ```
642
+ Specifies a timeout for your step.
657
643
 
658
- - Using Loaded Checkpoints
644
+ This decorator is useful if this step may hang indefinitely.
659
645
 
660
- ```python
661
- @retry(times=3)
662
- @checkpoint
663
- @step
664
- def train(self):
665
- # Assume that the task has restarted and the previous attempt of the task
666
- # saved a checkpoint
667
- checkpoint_path = None
668
- if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
669
- print("Loaded checkpoint from the previous attempt")
670
- checkpoint_path = current.checkpoint.directory
646
+ This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
647
+ A timeout is considered to be an exception thrown by the step. It will cause the step to be
648
+ retried if needed and the exception will be caught by the `@catch` decorator, if present.
671
649
 
672
- model = create_model(self.parameters, checkpoint_path = checkpoint_path)
673
- for i in range(self.epochs):
674
- ...
675
- ```
650
+ Note that all the values specified in parameters are added together so if you specify
651
+ 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
676
652
 
677
653
 
678
654
  Parameters
679
655
  ----------
680
- load_policy : str, default: "fresh"
681
- The policy for loading the checkpoint. The following policies are supported:
682
- - "eager": Loads the the latest available checkpoint within the namespace.
683
- With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
684
- will be loaded at the start of the task.
685
- - "none": Do not load any checkpoint
686
- - "fresh": Loads the lastest checkpoint created within the running Task.
687
- This mode helps loading checkpoints across various retry attempts of the same task.
688
- With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
689
- created within the task will be loaded when the task is retries execution on failure.
690
-
691
- temp_dir_root : str, default: None
692
- The root directory under which `current.checkpoint.directory` will be created.
656
+ seconds : int, default 0
657
+ Number of seconds to wait prior to timing out.
658
+ minutes : int, default 0
659
+ Number of minutes to wait prior to timing out.
660
+ hours : int, default 0
661
+ Number of hours to wait prior to timing out.
693
662
  """
694
663
  ...
695
664
 
696
665
  @typing.overload
697
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
666
+ def environment(*, vars: typing.Dict[str, str] = {}) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
698
667
  """
699
- Internal decorator to support Fast bakery
668
+ Specifies environment variables to be set prior to the execution of a step.
669
+
670
+
671
+ Parameters
672
+ ----------
673
+ vars : Dict[str, str], default {}
674
+ Dictionary of environment variables to set.
700
675
  """
701
676
  ...
702
677
 
703
678
  @typing.overload
704
- def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
679
+ def environment(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
705
680
  ...
706
681
 
707
- def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
708
- """
709
- Internal decorator to support Fast bakery
710
- """
682
+ @typing.overload
683
+ def environment(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
711
684
  ...
712
685
 
713
- @typing.overload
714
- def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
686
+ def environment(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, vars: typing.Dict[str, str] = {}):
715
687
  """
716
- Specifies the PyPI packages for the step.
717
-
718
- Information in this decorator will augment any
719
- attributes set in the `@pyi_base` flow-level decorator. Hence,
720
- you can use `@pypi_base` to set packages required by all
721
- steps and use `@pypi` to specify step-specific overrides.
688
+ Specifies environment variables to be set prior to the execution of a step.
722
689
 
723
690
 
724
691
  Parameters
725
692
  ----------
726
- packages : Dict[str, str], default: {}
727
- Packages to use for this step. The key is the name of the package
728
- and the value is the version to use.
729
- python : str, optional, default: None
730
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
731
- that the version used will correspond to the version of the Python interpreter used to start the run.
693
+ vars : Dict[str, str], default {}
694
+ Dictionary of environment variables to set.
732
695
  """
733
696
  ...
734
697
 
735
698
  @typing.overload
736
- def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
737
- ...
738
-
739
- @typing.overload
740
- def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
741
- ...
742
-
743
- def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
699
+ def pypi(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
744
700
  """
745
701
  Specifies the PyPI packages for the step.
746
702
 
@@ -761,134 +717,32 @@ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typ
761
717
  """
762
718
  ...
763
719
 
764
- def kubernetes(*, cpu: int = 1, memory: int = 4096, disk: int = 10240, image: typing.Optional[str] = None, image_pull_policy: str = 'KUBERNETES_IMAGE_PULL_POLICY', image_pull_secrets: typing.List[str] = [], service_account: str = 'METAFLOW_KUBERNETES_SERVICE_ACCOUNT', secrets: typing.Optional[typing.List[str]] = None, node_selector: typing.Union[typing.Dict[str, str], str, None] = None, namespace: str = 'METAFLOW_KUBERNETES_NAMESPACE', gpu: typing.Optional[int] = None, gpu_vendor: str = 'KUBERNETES_GPU_VENDOR', tolerations: typing.List[typing.Dict[str, str]] = [], labels: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_LABELS', annotations: typing.Dict[str, str] = 'METAFLOW_KUBERNETES_ANNOTATIONS', use_tmpfs: bool = False, tmpfs_tempdir: bool = True, tmpfs_size: typing.Optional[int] = None, tmpfs_path: typing.Optional[str] = '/metaflow_temp', persistent_volume_claims: typing.Optional[typing.Dict[str, str]] = None, shared_memory: typing.Optional[int] = None, port: typing.Optional[int] = None, compute_pool: typing.Optional[str] = None, hostname_resolution_timeout: int = 600, qos: str = 'Burstable', security_context: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
765
- """
766
- Specifies that this step should execute on Kubernetes.
767
-
768
-
769
- Parameters
770
- ----------
771
- cpu : int, default 1
772
- Number of CPUs required for this step. If `@resources` is
773
- also present, the maximum value from all decorators is used.
774
- memory : int, default 4096
775
- Memory size (in MB) required for this step. If
776
- `@resources` is also present, the maximum value from all decorators is
777
- used.
778
- disk : int, default 10240
779
- Disk size (in MB) required for this step. If
780
- `@resources` is also present, the maximum value from all decorators is
781
- used.
782
- image : str, optional, default None
783
- Docker image to use when launching on Kubernetes. If not specified, and
784
- METAFLOW_KUBERNETES_CONTAINER_IMAGE is specified, that image is used. If
785
- not, a default Docker image mapping to the current version of Python is used.
786
- image_pull_policy: str, default KUBERNETES_IMAGE_PULL_POLICY
787
- If given, the imagePullPolicy to be applied to the Docker image of the step.
788
- image_pull_secrets: List[str], default []
789
- The default is extracted from METAFLOW_KUBERNETES_IMAGE_PULL_SECRETS.
790
- Kubernetes image pull secrets to use when pulling container images
791
- in Kubernetes.
792
- service_account : str, default METAFLOW_KUBERNETES_SERVICE_ACCOUNT
793
- Kubernetes service account to use when launching pod in Kubernetes.
794
- secrets : List[str], optional, default None
795
- Kubernetes secrets to use when launching pod in Kubernetes. These
796
- secrets are in addition to the ones defined in `METAFLOW_KUBERNETES_SECRETS`
797
- in Metaflow configuration.
798
- node_selector: Union[Dict[str,str], str], optional, default None
799
- Kubernetes node selector(s) to apply to the pod running the task.
800
- Can be passed in as a comma separated string of values e.g.
801
- 'kubernetes.io/os=linux,kubernetes.io/arch=amd64' or as a dictionary
802
- {'kubernetes.io/os': 'linux', 'kubernetes.io/arch': 'amd64'}
803
- namespace : str, default METAFLOW_KUBERNETES_NAMESPACE
804
- Kubernetes namespace to use when launching pod in Kubernetes.
805
- gpu : int, optional, default None
806
- Number of GPUs required for this step. A value of zero implies that
807
- the scheduled node should not have GPUs.
808
- gpu_vendor : str, default KUBERNETES_GPU_VENDOR
809
- The vendor of the GPUs to be used for this step.
810
- tolerations : List[Dict[str,str]], default []
811
- The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
812
- Kubernetes tolerations to use when launching pod in Kubernetes.
813
- labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
814
- Kubernetes labels to use when launching pod in Kubernetes.
815
- annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
816
- Kubernetes annotations to use when launching pod in Kubernetes.
817
- use_tmpfs : bool, default False
818
- This enables an explicit tmpfs mount for this step.
819
- tmpfs_tempdir : bool, default True
820
- sets METAFLOW_TEMPDIR to tmpfs_path if set for this step.
821
- tmpfs_size : int, optional, default: None
822
- The value for the size (in MiB) of the tmpfs mount for this step.
823
- This parameter maps to the `--tmpfs` option in Docker. Defaults to 50% of the
824
- memory allocated for this step.
825
- tmpfs_path : str, optional, default /metaflow_temp
826
- Path to tmpfs mount for this step.
827
- persistent_volume_claims : Dict[str, str], optional, default None
828
- A map (dictionary) of persistent volumes to be mounted to the pod for this step. The map is from persistent
829
- volumes to the path to which the volume is to be mounted, e.g., `{'pvc-name': '/path/to/mount/on'}`.
830
- shared_memory: int, optional
831
- Shared memory size (in MiB) required for this step
832
- port: int, optional
833
- Port number to specify in the Kubernetes job object
834
- compute_pool : str, optional, default None
835
- Compute pool to be used for for this step.
836
- If not specified, any accessible compute pool within the perimeter is used.
837
- hostname_resolution_timeout: int, default 10 * 60
838
- Timeout in seconds for the workers tasks in the gang scheduled cluster to resolve the hostname of control task.
839
- Only applicable when @parallel is used.
840
- qos: str, default: Burstable
841
- Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
842
-
843
- security_context: Dict[str, Any], optional, default None
844
- Container security context. Applies to the task container. Allows the following keys:
845
- - privileged: bool, optional, default None
846
- - allow_privilege_escalation: bool, optional, default None
847
- - run_as_user: int, optional, default None
848
- - run_as_group: int, optional, default None
849
- - run_as_non_root: bool, optional, default None
850
- """
851
- ...
852
-
853
- @typing.overload
854
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
855
- """
856
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
857
- It exists to make it easier for users to know that this decorator should only be used with
858
- a Neo Cloud like Nebius.
859
- """
860
- ...
861
-
862
- @typing.overload
863
- def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
864
- ...
865
-
866
- def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
867
- """
868
- Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
869
- It exists to make it easier for users to know that this decorator should only be used with
870
- a Neo Cloud like Nebius.
871
- """
872
- ...
873
-
874
720
  @typing.overload
875
- def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
876
- """
877
- CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
878
- It exists to make it easier for users to know that this decorator should only be used with
879
- a Neo Cloud like CoreWeave.
880
- """
721
+ def pypi(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
881
722
  ...
882
723
 
883
724
  @typing.overload
884
- def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
725
+ def pypi(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
885
726
  ...
886
727
 
887
- def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
728
+ def pypi(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
888
729
  """
889
- CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
890
- It exists to make it easier for users to know that this decorator should only be used with
891
- a Neo Cloud like CoreWeave.
730
+ Specifies the PyPI packages for the step.
731
+
732
+ Information in this decorator will augment any
733
+ attributes set in the `@pyi_base` flow-level decorator. Hence,
734
+ you can use `@pypi_base` to set packages required by all
735
+ steps and use `@pypi` to specify step-specific overrides.
736
+
737
+
738
+ Parameters
739
+ ----------
740
+ packages : Dict[str, str], default: {}
741
+ Packages to use for this step. The key is the name of the package
742
+ and the value is the version to use.
743
+ python : str, optional, default: None
744
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
745
+ that the version used will correspond to the version of the Python interpreter used to start the run.
892
746
  """
893
747
  ...
894
748
 
@@ -973,131 +827,250 @@ def huggingface_hub(*, temp_dir_root: typing.Optional[str] = None, load: typing.
973
827
  ...
974
828
 
975
829
  @typing.overload
976
- def model(*, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
830
+ def checkpoint(*, load_policy: str = 'fresh', temp_dir_root: str = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
977
831
  """
978
- Enables loading / saving of models within a step.
832
+ Enables checkpointing for a step.
979
833
 
980
834
  > Examples
981
- - Saving Models
835
+
836
+ - Saving Checkpoints
837
+
982
838
  ```python
983
- @model
839
+ @checkpoint
984
840
  @step
985
841
  def train(self):
986
- # current.model.save returns a dictionary reference to the model saved
987
- self.my_model = current.model.save(
988
- path_to_my_model,
989
- label="my_model",
990
- metadata={
991
- "epochs": 10,
992
- "batch-size": 32,
993
- "learning-rate": 0.001,
994
- }
995
- )
996
- self.next(self.test)
842
+ model = create_model(self.parameters, checkpoint_path = None)
843
+ for i in range(self.epochs):
844
+ # some training logic
845
+ loss = model.train(self.dataset)
846
+ if i % 10 == 0:
847
+ model.save(
848
+ current.checkpoint.directory,
849
+ )
850
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
851
+ # and returns a reference dictionary to the checkpoint saved in the datastore
852
+ self.latest_checkpoint = current.checkpoint.save(
853
+ name="epoch_checkpoint",
854
+ metadata={
855
+ "epoch": i,
856
+ "loss": loss,
857
+ }
858
+ )
859
+ ```
997
860
 
998
- @model(load="my_model")
861
+ - Using Loaded Checkpoints
862
+
863
+ ```python
864
+ @retry(times=3)
865
+ @checkpoint
999
866
  @step
1000
- def test(self):
1001
- # `current.model.loaded` returns a dictionary of the loaded models
1002
- # where the key is the name of the artifact and the value is the path to the model
1003
- print(os.listdir(current.model.loaded["my_model"]))
1004
- self.next(self.end)
867
+ def train(self):
868
+ # Assume that the task has restarted and the previous attempt of the task
869
+ # saved a checkpoint
870
+ checkpoint_path = None
871
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
872
+ print("Loaded checkpoint from the previous attempt")
873
+ checkpoint_path = current.checkpoint.directory
874
+
875
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
876
+ for i in range(self.epochs):
877
+ ...
1005
878
  ```
1006
879
 
1007
- - Loading models
880
+
881
+ Parameters
882
+ ----------
883
+ load_policy : str, default: "fresh"
884
+ The policy for loading the checkpoint. The following policies are supported:
885
+ - "eager": Loads the the latest available checkpoint within the namespace.
886
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
887
+ will be loaded at the start of the task.
888
+ - "none": Do not load any checkpoint
889
+ - "fresh": Loads the lastest checkpoint created within the running Task.
890
+ This mode helps loading checkpoints across various retry attempts of the same task.
891
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
892
+ created within the task will be loaded when the task is retries execution on failure.
893
+
894
+ temp_dir_root : str, default: None
895
+ The root directory under which `current.checkpoint.directory` will be created.
896
+ """
897
+ ...
898
+
899
+ @typing.overload
900
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
901
+ ...
902
+
903
+ @typing.overload
904
+ def checkpoint(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
905
+ ...
906
+
907
+ def checkpoint(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load_policy: str = 'fresh', temp_dir_root: str = None):
908
+ """
909
+ Enables checkpointing for a step.
910
+
911
+ > Examples
912
+
913
+ - Saving Checkpoints
914
+
1008
915
  ```python
916
+ @checkpoint
1009
917
  @step
1010
918
  def train(self):
1011
- # current.model.load returns the path to the model loaded
1012
- checkpoint_path = current.model.load(
1013
- self.checkpoint_key,
1014
- )
1015
- model_path = current.model.load(
1016
- self.model,
1017
- )
1018
- self.next(self.test)
919
+ model = create_model(self.parameters, checkpoint_path = None)
920
+ for i in range(self.epochs):
921
+ # some training logic
922
+ loss = model.train(self.dataset)
923
+ if i % 10 == 0:
924
+ model.save(
925
+ current.checkpoint.directory,
926
+ )
927
+ # saves the contents of the `current.checkpoint.directory` as a checkpoint
928
+ # and returns a reference dictionary to the checkpoint saved in the datastore
929
+ self.latest_checkpoint = current.checkpoint.save(
930
+ name="epoch_checkpoint",
931
+ metadata={
932
+ "epoch": i,
933
+ "loss": loss,
934
+ }
935
+ )
936
+ ```
937
+
938
+ - Using Loaded Checkpoints
939
+
940
+ ```python
941
+ @retry(times=3)
942
+ @checkpoint
943
+ @step
944
+ def train(self):
945
+ # Assume that the task has restarted and the previous attempt of the task
946
+ # saved a checkpoint
947
+ checkpoint_path = None
948
+ if current.checkpoint.is_loaded: # Check if a checkpoint is loaded
949
+ print("Loaded checkpoint from the previous attempt")
950
+ checkpoint_path = current.checkpoint.directory
951
+
952
+ model = create_model(self.parameters, checkpoint_path = checkpoint_path)
953
+ for i in range(self.epochs):
954
+ ...
1019
955
  ```
1020
956
 
1021
957
 
1022
958
  Parameters
1023
959
  ----------
1024
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1025
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1026
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1027
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1028
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1029
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
960
+ load_policy : str, default: "fresh"
961
+ The policy for loading the checkpoint. The following policies are supported:
962
+ - "eager": Loads the the latest available checkpoint within the namespace.
963
+ With this mode, the latest checkpoint written by any previous task (can be even a different run) of the step
964
+ will be loaded at the start of the task.
965
+ - "none": Do not load any checkpoint
966
+ - "fresh": Loads the lastest checkpoint created within the running Task.
967
+ This mode helps loading checkpoints across various retry attempts of the same task.
968
+ With this mode, no checkpoint will be loaded at the start of a task but any checkpoints
969
+ created within the task will be loaded when the task is retries execution on failure.
1030
970
 
1031
971
  temp_dir_root : str, default: None
1032
- The root directory under which `current.model.loaded` will store loaded models
972
+ The root directory under which `current.checkpoint.directory` will be created.
1033
973
  """
1034
974
  ...
1035
975
 
1036
976
  @typing.overload
1037
- def model(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
977
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
978
+ """
979
+ A simple decorator that demonstrates using CardDecoratorInjector
980
+ to inject a card and render simple markdown content.
981
+ """
1038
982
  ...
1039
983
 
1040
984
  @typing.overload
1041
- def model(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
985
+ def test_append_card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
986
+ ...
987
+
988
+ def test_append_card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
989
+ """
990
+ A simple decorator that demonstrates using CardDecoratorInjector
991
+ to inject a card and render simple markdown content.
992
+ """
993
+ ...
994
+
995
+ @typing.overload
996
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
997
+ """
998
+ Decorator prototype for all step decorators. This function gets specialized
999
+ and imported for all decorators types by _import_plugin_decorators().
1000
+ """
1001
+ ...
1002
+
1003
+ @typing.overload
1004
+ def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1005
+ ...
1006
+
1007
+ def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1008
+ """
1009
+ Decorator prototype for all step decorators. This function gets specialized
1010
+ and imported for all decorators types by _import_plugin_decorators().
1011
+ """
1012
+ ...
1013
+
1014
+ @typing.overload
1015
+ def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1016
+ """
1017
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1018
+
1019
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1020
+
1021
+
1022
+ Parameters
1023
+ ----------
1024
+ type : str, default 'default'
1025
+ Card type.
1026
+ id : str, optional, default None
1027
+ If multiple cards are present, use this id to identify this card.
1028
+ options : Dict[str, Any], default {}
1029
+ Options passed to the card. The contents depend on the card type.
1030
+ timeout : int, default 45
1031
+ Interrupt reporting if it takes more than this many seconds.
1032
+ """
1033
+ ...
1034
+
1035
+ @typing.overload
1036
+ def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1037
+ ...
1038
+
1039
+ @typing.overload
1040
+ def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1041
+ ...
1042
+
1043
+ def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1044
+ """
1045
+ Creates a human-readable report, a Metaflow Card, after this step completes.
1046
+
1047
+ Note that you may add multiple `@card` decorators in a step with different parameters.
1048
+
1049
+
1050
+ Parameters
1051
+ ----------
1052
+ type : str, default 'default'
1053
+ Card type.
1054
+ id : str, optional, default None
1055
+ If multiple cards are present, use this id to identify this card.
1056
+ options : Dict[str, Any], default {}
1057
+ Options passed to the card. The contents depend on the card type.
1058
+ timeout : int, default 45
1059
+ Interrupt reporting if it takes more than this many seconds.
1060
+ """
1042
1061
  ...
1043
1062
 
1044
- def model(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, load: typing.Union[typing.List[str], str, typing.List[typing.Tuple[str, typing.Optional[str]]]] = None, temp_dir_root: str = None):
1063
+ def nvct(*, gpu: int, gpu_type: str) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1045
1064
  """
1046
- Enables loading / saving of models within a step.
1047
-
1048
- > Examples
1049
- - Saving Models
1050
- ```python
1051
- @model
1052
- @step
1053
- def train(self):
1054
- # current.model.save returns a dictionary reference to the model saved
1055
- self.my_model = current.model.save(
1056
- path_to_my_model,
1057
- label="my_model",
1058
- metadata={
1059
- "epochs": 10,
1060
- "batch-size": 32,
1061
- "learning-rate": 0.001,
1062
- }
1063
- )
1064
- self.next(self.test)
1065
-
1066
- @model(load="my_model")
1067
- @step
1068
- def test(self):
1069
- # `current.model.loaded` returns a dictionary of the loaded models
1070
- # where the key is the name of the artifact and the value is the path to the model
1071
- print(os.listdir(current.model.loaded["my_model"]))
1072
- self.next(self.end)
1073
- ```
1074
-
1075
- - Loading models
1076
- ```python
1077
- @step
1078
- def train(self):
1079
- # current.model.load returns the path to the model loaded
1080
- checkpoint_path = current.model.load(
1081
- self.checkpoint_key,
1082
- )
1083
- model_path = current.model.load(
1084
- self.model,
1085
- )
1086
- self.next(self.test)
1087
- ```
1065
+ Specifies that this step should execute on DGX cloud.
1088
1066
 
1089
1067
 
1090
1068
  Parameters
1091
1069
  ----------
1092
- load : Union[List[str],str,List[Tuple[str,Union[str,None]]]], default: None
1093
- Artifact name/s referencing the models/checkpoints to load. Artifact names refer to the names of the instance variables set to `self`.
1094
- These artifact names give to `load` be reference objects or reference `key` string's from objects created by `current.checkpoint` / `current.model` / `current.huggingface_hub`.
1095
- If a list of tuples is provided, the first element is the artifact name and the second element is the path the artifact needs be unpacked on
1096
- the local filesystem. If the second element is None, the artifact will be unpacked in the current working directory.
1097
- If a string is provided, then the artifact corresponding to that name will be loaded in the current working directory.
1098
-
1099
- temp_dir_root : str, default: None
1100
- The root directory under which `current.model.loaded` will store loaded models
1070
+ gpu : int
1071
+ Number of GPUs to use.
1072
+ gpu_type : str
1073
+ Type of Nvidia GPU to use.
1101
1074
  """
1102
1075
  ...
1103
1076
 
@@ -1121,230 +1094,427 @@ def parallel(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None],
1121
1094
  ...
1122
1095
 
1123
1096
  @typing.overload
1124
- def timeout(*, seconds: int = 0, minutes: int = 0, hours: int = 0) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1097
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1125
1098
  """
1126
- Specifies a timeout for your step.
1099
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1100
+ It exists to make it easier for users to know that this decorator should only be used with
1101
+ a Neo Cloud like Nebius.
1102
+ """
1103
+ ...
1104
+
1105
+ @typing.overload
1106
+ def nebius_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1107
+ ...
1108
+
1109
+ def nebius_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1110
+ """
1111
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1112
+ It exists to make it easier for users to know that this decorator should only be used with
1113
+ a Neo Cloud like Nebius.
1114
+ """
1115
+ ...
1116
+
1117
+ def ollama(*, models: list, backend: str, force_pull: bool, cache_update_policy: str, force_cache_update: bool, debug: bool, circuit_breaker_config: dict, timeout_config: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1118
+ """
1119
+ This decorator is used to run Ollama APIs as Metaflow task sidecars.
1127
1120
 
1128
- This decorator is useful if this step may hang indefinitely.
1121
+ User code call
1122
+ --------------
1123
+ @ollama(
1124
+ models=[...],
1125
+ ...
1126
+ )
1129
1127
 
1130
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1131
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1132
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1128
+ Valid backend options
1129
+ ---------------------
1130
+ - 'local': Run as a separate process on the local task machine.
1131
+ - (TODO) 'managed': Outerbounds hosts and selects compute provider.
1132
+ - (TODO) 'remote': Spin up separate instance to serve Ollama models.
1133
1133
 
1134
- Note that all the values specified in parameters are added together so if you specify
1135
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1134
+ Valid model options
1135
+ -------------------
1136
+ Any model here https://ollama.com/search, e.g. 'llama3.2', 'llama3.3'
1136
1137
 
1137
1138
 
1138
1139
  Parameters
1139
1140
  ----------
1140
- seconds : int, default 0
1141
- Number of seconds to wait prior to timing out.
1142
- minutes : int, default 0
1143
- Number of minutes to wait prior to timing out.
1144
- hours : int, default 0
1145
- Number of hours to wait prior to timing out.
1141
+ models: list[str]
1142
+ List of Ollama containers running models in sidecars.
1143
+ backend: str
1144
+ Determines where and how to run the Ollama process.
1145
+ force_pull: bool
1146
+ Whether to run `ollama pull` no matter what, or first check the remote cache in Metaflow datastore for this model key.
1147
+ cache_update_policy: str
1148
+ Cache update policy: "auto", "force", or "never".
1149
+ force_cache_update: bool
1150
+ Simple override for "force" cache update policy.
1151
+ debug: bool
1152
+ Whether to turn on verbose debugging logs.
1153
+ circuit_breaker_config: dict
1154
+ Configuration for circuit breaker protection. Keys: failure_threshold, recovery_timeout, reset_timeout.
1155
+ timeout_config: dict
1156
+ Configuration for various operation timeouts. Keys: pull, stop, health_check, install, server_startup.
1146
1157
  """
1147
1158
  ...
1148
1159
 
1149
1160
  @typing.overload
1150
- def timeout(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1161
+ def conda(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1162
+ """
1163
+ Specifies the Conda environment for the step.
1164
+
1165
+ Information in this decorator will augment any
1166
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1167
+ you can use `@conda_base` to set packages required by all
1168
+ steps and use `@conda` to specify step-specific overrides.
1169
+
1170
+
1171
+ Parameters
1172
+ ----------
1173
+ packages : Dict[str, str], default {}
1174
+ Packages to use for this step. The key is the name of the package
1175
+ and the value is the version to use.
1176
+ libraries : Dict[str, str], default {}
1177
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1178
+ python : str, optional, default None
1179
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1180
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1181
+ disabled : bool, default False
1182
+ If set to True, disables @conda.
1183
+ """
1151
1184
  ...
1152
1185
 
1153
1186
  @typing.overload
1154
- def timeout(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1187
+ def conda(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1155
1188
  ...
1156
1189
 
1157
- def timeout(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, seconds: int = 0, minutes: int = 0, hours: int = 0):
1190
+ @typing.overload
1191
+ def conda(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1192
+ ...
1193
+
1194
+ def conda(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1158
1195
  """
1159
- Specifies a timeout for your step.
1196
+ Specifies the Conda environment for the step.
1160
1197
 
1161
- This decorator is useful if this step may hang indefinitely.
1198
+ Information in this decorator will augment any
1199
+ attributes set in the `@conda_base` flow-level decorator. Hence,
1200
+ you can use `@conda_base` to set packages required by all
1201
+ steps and use `@conda` to specify step-specific overrides.
1162
1202
 
1163
- This can be used in conjunction with the `@retry` decorator as well as the `@catch` decorator.
1164
- A timeout is considered to be an exception thrown by the step. It will cause the step to be
1165
- retried if needed and the exception will be caught by the `@catch` decorator, if present.
1166
1203
 
1167
- Note that all the values specified in parameters are added together so if you specify
1168
- 60 seconds and 1 hour, the decorator will have an effective timeout of 1 hour and 1 minute.
1204
+ Parameters
1205
+ ----------
1206
+ packages : Dict[str, str], default {}
1207
+ Packages to use for this step. The key is the name of the package
1208
+ and the value is the version to use.
1209
+ libraries : Dict[str, str], default {}
1210
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1211
+ python : str, optional, default None
1212
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1213
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1214
+ disabled : bool, default False
1215
+ If set to True, disables @conda.
1216
+ """
1217
+ ...
1218
+
1219
+ @typing.overload
1220
+ def secrets(*, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1221
+ """
1222
+ Specifies secrets to be retrieved and injected as environment variables prior to
1223
+ the execution of a step.
1169
1224
 
1170
1225
 
1171
1226
  Parameters
1172
1227
  ----------
1173
- seconds : int, default 0
1174
- Number of seconds to wait prior to timing out.
1175
- minutes : int, default 0
1176
- Number of minutes to wait prior to timing out.
1177
- hours : int, default 0
1178
- Number of hours to wait prior to timing out.
1228
+ sources : List[Union[str, Dict[str, Any]]], default: []
1229
+ List of secret specs, defining how the secrets are to be retrieved
1230
+ role : str, optional, default: None
1231
+ Role to use for fetching secrets
1232
+ """
1233
+ ...
1234
+
1235
+ @typing.overload
1236
+ def secrets(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1237
+ ...
1238
+
1239
+ @typing.overload
1240
+ def secrets(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1241
+ ...
1242
+
1243
+ def secrets(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, sources: typing.List[typing.Union[str, typing.Dict[str, typing.Any]]] = [], role: typing.Optional[str] = None):
1244
+ """
1245
+ Specifies secrets to be retrieved and injected as environment variables prior to
1246
+ the execution of a step.
1247
+
1248
+
1249
+ Parameters
1250
+ ----------
1251
+ sources : List[Union[str, Dict[str, Any]]], default: []
1252
+ List of secret specs, defining how the secrets are to be retrieved
1253
+ role : str, optional, default: None
1254
+ Role to use for fetching secrets
1255
+ """
1256
+ ...
1257
+
1258
+ @typing.overload
1259
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1260
+ """
1261
+ Internal decorator to support Fast bakery
1262
+ """
1263
+ ...
1264
+
1265
+ @typing.overload
1266
+ def fast_bakery_internal(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1267
+ ...
1268
+
1269
+ def fast_bakery_internal(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1270
+ """
1271
+ Internal decorator to support Fast bakery
1272
+ """
1273
+ ...
1274
+
1275
+ @typing.overload
1276
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1277
+ """
1278
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1279
+ It exists to make it easier for users to know that this decorator should only be used with
1280
+ a Neo Cloud like CoreWeave.
1281
+ """
1282
+ ...
1283
+
1284
+ @typing.overload
1285
+ def coreweave_s3_proxy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1286
+ ...
1287
+
1288
+ def coreweave_s3_proxy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1289
+ """
1290
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
1291
+ It exists to make it easier for users to know that this decorator should only be used with
1292
+ a Neo Cloud like CoreWeave.
1179
1293
  """
1180
1294
  ...
1181
1295
 
1182
1296
  @typing.overload
1183
- def card(*, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1297
+ def retry(*, times: int = 3, minutes_between_retries: int = 2) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1184
1298
  """
1185
- Creates a human-readable report, a Metaflow Card, after this step completes.
1299
+ Specifies the number of times the task corresponding
1300
+ to a step needs to be retried.
1186
1301
 
1187
- Note that you may add multiple `@card` decorators in a step with different parameters.
1302
+ This decorator is useful for handling transient errors, such as networking issues.
1303
+ If your task contains operations that can't be retried safely, e.g. database updates,
1304
+ it is advisable to annotate it with `@retry(times=0)`.
1305
+
1306
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1307
+ decorator will execute a no-op task after all retries have been exhausted,
1308
+ ensuring that the flow execution can continue.
1188
1309
 
1189
1310
 
1190
1311
  Parameters
1191
1312
  ----------
1192
- type : str, default 'default'
1193
- Card type.
1194
- id : str, optional, default None
1195
- If multiple cards are present, use this id to identify this card.
1196
- options : Dict[str, Any], default {}
1197
- Options passed to the card. The contents depend on the card type.
1198
- timeout : int, default 45
1199
- Interrupt reporting if it takes more than this many seconds.
1313
+ times : int, default 3
1314
+ Number of times to retry this task.
1315
+ minutes_between_retries : int, default 2
1316
+ Number of minutes between retries.
1200
1317
  """
1201
1318
  ...
1202
1319
 
1203
1320
  @typing.overload
1204
- def card(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1321
+ def retry(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1205
1322
  ...
1206
1323
 
1207
1324
  @typing.overload
1208
- def card(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1325
+ def retry(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1209
1326
  ...
1210
1327
 
1211
- def card(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, type: str = 'default', id: typing.Optional[str] = None, options: typing.Dict[str, typing.Any] = {}, timeout: int = 45):
1328
+ def retry(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, times: int = 3, minutes_between_retries: int = 2):
1212
1329
  """
1213
- Creates a human-readable report, a Metaflow Card, after this step completes.
1330
+ Specifies the number of times the task corresponding
1331
+ to a step needs to be retried.
1214
1332
 
1215
- Note that you may add multiple `@card` decorators in a step with different parameters.
1333
+ This decorator is useful for handling transient errors, such as networking issues.
1334
+ If your task contains operations that can't be retried safely, e.g. database updates,
1335
+ it is advisable to annotate it with `@retry(times=0)`.
1336
+
1337
+ This can be used in conjunction with the `@catch` decorator. The `@catch`
1338
+ decorator will execute a no-op task after all retries have been exhausted,
1339
+ ensuring that the flow execution can continue.
1216
1340
 
1217
1341
 
1218
1342
  Parameters
1219
1343
  ----------
1220
- type : str, default 'default'
1221
- Card type.
1222
- id : str, optional, default None
1223
- If multiple cards are present, use this id to identify this card.
1224
- options : Dict[str, Any], default {}
1225
- Options passed to the card. The contents depend on the card type.
1226
- timeout : int, default 45
1227
- Interrupt reporting if it takes more than this many seconds.
1344
+ times : int, default 3
1345
+ Number of times to retry this task.
1346
+ minutes_between_retries : int, default 2
1347
+ Number of minutes between retries.
1228
1348
  """
1229
1349
  ...
1230
1350
 
1231
1351
  @typing.overload
1232
- def app_deploy(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1352
+ def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1233
1353
  """
1234
- Decorator prototype for all step decorators. This function gets specialized
1235
- and imported for all decorators types by _import_plugin_decorators().
1354
+ Specifies the PyPI packages for all steps of the flow.
1355
+
1356
+ Use `@pypi_base` to set common packages required by all
1357
+ steps and use `@pypi` to specify step-specific overrides.
1358
+
1359
+ Parameters
1360
+ ----------
1361
+ packages : Dict[str, str], default: {}
1362
+ Packages to use for this flow. The key is the name of the package
1363
+ and the value is the version to use.
1364
+ python : str, optional, default: None
1365
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1366
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1236
1367
  """
1237
1368
  ...
1238
1369
 
1239
1370
  @typing.overload
1240
- def app_deploy(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1371
+ def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1241
1372
  ...
1242
1373
 
1243
- def app_deploy(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None):
1374
+ def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1244
1375
  """
1245
- Decorator prototype for all step decorators. This function gets specialized
1246
- and imported for all decorators types by _import_plugin_decorators().
1376
+ Specifies the PyPI packages for all steps of the flow.
1377
+
1378
+ Use `@pypi_base` to set common packages required by all
1379
+ steps and use `@pypi` to specify step-specific overrides.
1380
+
1381
+ Parameters
1382
+ ----------
1383
+ packages : Dict[str, str], default: {}
1384
+ Packages to use for this flow. The key is the name of the package
1385
+ and the value is the version to use.
1386
+ python : str, optional, default: None
1387
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1388
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1247
1389
  """
1248
1390
  ...
1249
1391
 
1250
1392
  @typing.overload
1251
- def catch(*, var: typing.Optional[str] = None, print_exception: bool = True) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1393
+ def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1252
1394
  """
1253
- Specifies that the step will success under all circumstances.
1254
-
1255
- The decorator will create an optional artifact, specified by `var`, which
1256
- contains the exception raised. You can use it to detect the presence
1257
- of errors, indicating that all happy-path artifacts produced by the step
1258
- are missing.
1395
+ Specifies the times when the flow should be run when running on a
1396
+ production scheduler.
1259
1397
 
1260
1398
 
1261
1399
  Parameters
1262
1400
  ----------
1263
- var : str, optional, default None
1264
- Name of the artifact in which to store the caught exception.
1265
- If not specified, the exception is not stored.
1266
- print_exception : bool, default True
1267
- Determines whether or not the exception is printed to
1268
- stdout when caught.
1401
+ hourly : bool, default False
1402
+ Run the workflow hourly.
1403
+ daily : bool, default True
1404
+ Run the workflow daily.
1405
+ weekly : bool, default False
1406
+ Run the workflow weekly.
1407
+ cron : str, optional, default None
1408
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1409
+ specified by this expression.
1410
+ timezone : str, optional, default None
1411
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1412
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1269
1413
  """
1270
1414
  ...
1271
1415
 
1272
1416
  @typing.overload
1273
- def catch(f: typing.Callable[[FlowSpecDerived, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, StepFlag], None]:
1274
- ...
1275
-
1276
- @typing.overload
1277
- def catch(f: typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]) -> typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]:
1417
+ def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1278
1418
  ...
1279
1419
 
1280
- def catch(f: typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None], None] = None, *, var: typing.Optional[str] = None, print_exception: bool = True):
1420
+ def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1281
1421
  """
1282
- Specifies that the step will success under all circumstances.
1283
-
1284
- The decorator will create an optional artifact, specified by `var`, which
1285
- contains the exception raised. You can use it to detect the presence
1286
- of errors, indicating that all happy-path artifacts produced by the step
1287
- are missing.
1422
+ Specifies the times when the flow should be run when running on a
1423
+ production scheduler.
1288
1424
 
1289
1425
 
1290
1426
  Parameters
1291
1427
  ----------
1292
- var : str, optional, default None
1293
- Name of the artifact in which to store the caught exception.
1294
- If not specified, the exception is not stored.
1295
- print_exception : bool, default True
1296
- Determines whether or not the exception is printed to
1297
- stdout when caught.
1428
+ hourly : bool, default False
1429
+ Run the workflow hourly.
1430
+ daily : bool, default True
1431
+ Run the workflow daily.
1432
+ weekly : bool, default False
1433
+ Run the workflow weekly.
1434
+ cron : str, optional, default None
1435
+ Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1436
+ specified by this expression.
1437
+ timezone : str, optional, default None
1438
+ Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1439
+ which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1298
1440
  """
1299
1441
  ...
1300
1442
 
1301
- def vllm(*, model: str, backend: str, openai_api_server: bool, debug: bool, card_refresh_interval: int, max_retries: int, retry_alert_frequency: int, engine_args: dict) -> typing.Callable[[typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]], typing.Union[typing.Callable[[FlowSpecDerived, StepFlag], None], typing.Callable[[FlowSpecDerived, typing.Any, StepFlag], None]]]:
1443
+ def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1302
1444
  """
1303
- This decorator is used to run vllm APIs as Metaflow task sidecars.
1445
+ Specifies what flows belong to the same project.
1304
1446
 
1305
- User code call
1306
- --------------
1307
- @vllm(
1308
- model="...",
1309
- ...
1310
- )
1447
+ A project-specific namespace is created for all flows that
1448
+ use the same `@project(name)`.
1311
1449
 
1312
- Valid backend options
1313
- ---------------------
1314
- - 'local': Run as a separate process on the local task machine.
1315
1450
 
1316
- Valid model options
1317
- -------------------
1318
- Any HuggingFace model identifier, e.g. 'meta-llama/Llama-3.2-1B'
1451
+ Parameters
1452
+ ----------
1453
+ name : str
1454
+ Project name. Make sure that the name is unique amongst all
1455
+ projects that use the same production scheduler. The name may
1456
+ contain only lowercase alphanumeric characters and underscores.
1319
1457
 
1320
- NOTE: vLLM's OpenAI-compatible server serves ONE model per server instance.
1321
- If you need multiple models, you must create multiple @vllm decorators.
1458
+ branch : Optional[str], default None
1459
+ The branch to use. If not specified, the branch is set to
1460
+ `user.<username>` unless `production` is set to `True`. This can
1461
+ also be set on the command line using `--branch` as a top-level option.
1462
+ It is an error to specify `branch` in the decorator and on the command line.
1463
+
1464
+ production : bool, default False
1465
+ Whether or not the branch is the production branch. This can also be set on the
1466
+ command line using `--production` as a top-level option. It is an error to specify
1467
+ `production` in the decorator and on the command line.
1468
+ The project branch name will be:
1469
+ - if `branch` is specified:
1470
+ - if `production` is True: `prod.<branch>`
1471
+ - if `production` is False: `test.<branch>`
1472
+ - if `branch` is not specified:
1473
+ - if `production` is True: `prod`
1474
+ - if `production` is False: `user.<username>`
1475
+ """
1476
+ ...
1477
+
1478
+ def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1479
+ """
1480
+ The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1481
+ before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1482
+ and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1483
+ added as a flow decorators. Adding more than one decorator will ensure that `start` step
1484
+ starts only after all sensors finish.
1322
1485
 
1323
1486
 
1324
1487
  Parameters
1325
1488
  ----------
1326
- model: str
1327
- HuggingFace model identifier to be served by vLLM.
1328
- backend: str
1329
- Determines where and how to run the vLLM process.
1330
- openai_api_server: bool
1331
- Whether to use OpenAI-compatible API server mode (subprocess) instead of native engine.
1332
- Default is False (uses native engine).
1333
- Set to True for backward compatibility with existing code.
1334
- debug: bool
1335
- Whether to turn on verbose debugging logs.
1336
- card_refresh_interval: int
1337
- Interval in seconds for refreshing the vLLM status card.
1338
- Only used when openai_api_server=True.
1339
- max_retries: int
1340
- Maximum number of retries checking for vLLM server startup.
1341
- Only used when openai_api_server=True.
1342
- retry_alert_frequency: int
1343
- Frequency of alert logs for vLLM server startup retries.
1344
- Only used when openai_api_server=True.
1345
- engine_args : dict
1346
- Additional keyword arguments to pass to the vLLM engine.
1347
- For example, `tensor_parallel_size=2`.
1489
+ timeout : int
1490
+ Time, in seconds before the task times out and fails. (Default: 3600)
1491
+ poke_interval : int
1492
+ Time in seconds that the job should wait in between each try. (Default: 60)
1493
+ mode : str
1494
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1495
+ exponential_backoff : bool
1496
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1497
+ pool : str
1498
+ the slot pool this task should run in,
1499
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1500
+ soft_fail : bool
1501
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1502
+ name : str
1503
+ Name of the sensor on Airflow
1504
+ description : str
1505
+ Description of sensor in the Airflow UI
1506
+ bucket_key : Union[str, List[str]]
1507
+ The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1508
+ When it's specified as a full s3:// url, please leave `bucket_name` as None
1509
+ bucket_name : str
1510
+ Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1511
+ When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1512
+ wildcard_match : bool
1513
+ whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1514
+ aws_conn_id : str
1515
+ a reference to the s3 connection on Airflow. (Default: None)
1516
+ verify : bool
1517
+ Whether or not to verify SSL certificates for S3 connection. (Default: None)
1348
1518
  """
1349
1519
  ...
1350
1520
 
@@ -1441,155 +1611,97 @@ def trigger(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, event: t
1441
1611
  """
1442
1612
  ...
1443
1613
 
1444
- @typing.overload
1445
- def schedule(*, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1446
- """
1447
- Specifies the times when the flow should be run when running on a
1448
- production scheduler.
1449
-
1450
-
1451
- Parameters
1452
- ----------
1453
- hourly : bool, default False
1454
- Run the workflow hourly.
1455
- daily : bool, default True
1456
- Run the workflow daily.
1457
- weekly : bool, default False
1458
- Run the workflow weekly.
1459
- cron : str, optional, default None
1460
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1461
- specified by this expression.
1462
- timezone : str, optional, default None
1463
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1464
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1465
- """
1466
- ...
1467
-
1468
- @typing.overload
1469
- def schedule(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1470
- ...
1471
-
1472
- def schedule(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, hourly: bool = False, daily: bool = True, weekly: bool = False, cron: typing.Optional[str] = None, timezone: typing.Optional[str] = None):
1614
+ def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1473
1615
  """
1474
- Specifies the times when the flow should be run when running on a
1475
- production scheduler.
1616
+ The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1617
+ This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1476
1618
 
1477
1619
 
1478
1620
  Parameters
1479
1621
  ----------
1480
- hourly : bool, default False
1481
- Run the workflow hourly.
1482
- daily : bool, default True
1483
- Run the workflow daily.
1484
- weekly : bool, default False
1485
- Run the workflow weekly.
1486
- cron : str, optional, default None
1487
- Run the workflow at [a custom Cron schedule](https://docs.aws.amazon.com/eventbridge/latest/userguide/scheduled-events.html#cron-expressions)
1488
- specified by this expression.
1489
- timezone : str, optional, default None
1490
- Timezone on which the schedule runs (default: None). Currently supported only for Argo workflows,
1491
- which accepts timezones in [IANA format](https://nodatime.org/TimeZones).
1622
+ timeout : int
1623
+ Time, in seconds before the task times out and fails. (Default: 3600)
1624
+ poke_interval : int
1625
+ Time in seconds that the job should wait in between each try. (Default: 60)
1626
+ mode : str
1627
+ How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1628
+ exponential_backoff : bool
1629
+ allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1630
+ pool : str
1631
+ the slot pool this task should run in,
1632
+ slot pools are a way to limit concurrency for certain tasks. (Default:None)
1633
+ soft_fail : bool
1634
+ Set to true to mark the task as SKIPPED on failure. (Default: False)
1635
+ name : str
1636
+ Name of the sensor on Airflow
1637
+ description : str
1638
+ Description of sensor in the Airflow UI
1639
+ external_dag_id : str
1640
+ The dag_id that contains the task you want to wait for.
1641
+ external_task_ids : List[str]
1642
+ The list of task_ids that you want to wait for.
1643
+ If None (default value) the sensor waits for the DAG. (Default: None)
1644
+ allowed_states : List[str]
1645
+ Iterable of allowed states, (Default: ['success'])
1646
+ failed_states : List[str]
1647
+ Iterable of failed or dis-allowed states. (Default: None)
1648
+ execution_delta : datetime.timedelta
1649
+ time difference with the previous execution to look at,
1650
+ the default is the same logical date as the current task or DAG. (Default: None)
1651
+ check_existence: bool
1652
+ Set to True to check if the external task exists or check if
1653
+ the DAG to wait for exists. (Default: True)
1492
1654
  """
1493
1655
  ...
1494
1656
 
1495
1657
  @typing.overload
1496
- def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1658
+ def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1497
1659
  """
1498
- Specifies the flow(s) that this flow depends on.
1499
-
1500
- ```
1501
- @trigger_on_finish(flow='FooFlow')
1502
- ```
1503
- or
1504
- ```
1505
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1506
- ```
1507
- This decorator respects the @project decorator and triggers the flow
1508
- when upstream runs within the same namespace complete successfully
1509
-
1510
- Additionally, you can specify project aware upstream flow dependencies
1511
- by specifying the fully qualified project_flow_name.
1512
- ```
1513
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1514
- ```
1515
- or
1516
- ```
1517
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1518
- ```
1519
-
1520
- You can also specify just the project or project branch (other values will be
1521
- inferred from the current project or project branch):
1522
- ```
1523
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1524
- ```
1660
+ Specifies the Conda environment for all steps of the flow.
1525
1661
 
1526
- Note that `branch` is typically one of:
1527
- - `prod`
1528
- - `user.bob`
1529
- - `test.my_experiment`
1530
- - `prod.staging`
1662
+ Use `@conda_base` to set common libraries required by all
1663
+ steps and use `@conda` to specify step-specific additions.
1531
1664
 
1532
1665
 
1533
1666
  Parameters
1534
1667
  ----------
1535
- flow : Union[str, Dict[str, str]], optional, default None
1536
- Upstream flow dependency for this flow.
1537
- flows : List[Union[str, Dict[str, str]]], default []
1538
- Upstream flow dependencies for this flow.
1539
- options : Dict[str, Any], default {}
1540
- Backend-specific configuration for tuning eventing behavior.
1668
+ packages : Dict[str, str], default {}
1669
+ Packages to use for this flow. The key is the name of the package
1670
+ and the value is the version to use.
1671
+ libraries : Dict[str, str], default {}
1672
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1673
+ python : str, optional, default None
1674
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1675
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1676
+ disabled : bool, default False
1677
+ If set to True, disables Conda.
1541
1678
  """
1542
1679
  ...
1543
1680
 
1544
1681
  @typing.overload
1545
- def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1682
+ def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1546
1683
  ...
1547
1684
 
1548
- def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1685
+ def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1549
1686
  """
1550
- Specifies the flow(s) that this flow depends on.
1551
-
1552
- ```
1553
- @trigger_on_finish(flow='FooFlow')
1554
- ```
1555
- or
1556
- ```
1557
- @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1558
- ```
1559
- This decorator respects the @project decorator and triggers the flow
1560
- when upstream runs within the same namespace complete successfully
1561
-
1562
- Additionally, you can specify project aware upstream flow dependencies
1563
- by specifying the fully qualified project_flow_name.
1564
- ```
1565
- @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1566
- ```
1567
- or
1568
- ```
1569
- @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1570
- ```
1571
-
1572
- You can also specify just the project or project branch (other values will be
1573
- inferred from the current project or project branch):
1574
- ```
1575
- @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1576
- ```
1687
+ Specifies the Conda environment for all steps of the flow.
1577
1688
 
1578
- Note that `branch` is typically one of:
1579
- - `prod`
1580
- - `user.bob`
1581
- - `test.my_experiment`
1582
- - `prod.staging`
1689
+ Use `@conda_base` to set common libraries required by all
1690
+ steps and use `@conda` to specify step-specific additions.
1583
1691
 
1584
1692
 
1585
1693
  Parameters
1586
1694
  ----------
1587
- flow : Union[str, Dict[str, str]], optional, default None
1588
- Upstream flow dependency for this flow.
1589
- flows : List[Union[str, Dict[str, str]]], default []
1590
- Upstream flow dependencies for this flow.
1591
- options : Dict[str, Any], default {}
1592
- Backend-specific configuration for tuning eventing behavior.
1695
+ packages : Dict[str, str], default {}
1696
+ Packages to use for this flow. The key is the name of the package
1697
+ and the value is the version to use.
1698
+ libraries : Dict[str, str], default {}
1699
+ Supported for backward compatibility. When used with packages, packages will take precedence.
1700
+ python : str, optional, default None
1701
+ Version of Python to use, e.g. '3.7.4'. A default value of None implies
1702
+ that the version used will correspond to the version of the Python interpreter used to start the run.
1703
+ disabled : bool, default False
1704
+ If set to True, disables Conda.
1593
1705
  """
1594
1706
  ...
1595
1707
 
@@ -1672,251 +1784,139 @@ def with_artifact_store(f: typing.Optional[typing.Type[FlowSpecDerived]] = None)
1672
1784
  task=run["start"].task
1673
1785
  )[0]
1674
1786
  print(latest)
1675
- cp.load(
1676
- latest,
1677
- "test-checkpoints"
1678
- )
1679
-
1680
- task = Task("TorchTuneFlow/8484/train/53673")
1681
- with artifact_store_from(run=run, config={
1682
- "client_params": {
1683
- "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1684
- "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1685
- },
1686
- }):
1687
- load_model(
1688
- task.data.model_ref,
1689
- "test-models"
1690
- )
1691
- ```
1692
- Parameters:
1693
- ----------
1694
-
1695
- type: str
1696
- The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1697
-
1698
- config: dict or Callable
1699
- Dictionary of configuration options for the datastore. The following keys are required:
1700
- - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1701
- - example: 's3://bucket-name/path/to/root'
1702
- - example: 'gs://bucket-name/path/to/root'
1703
- - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1704
- - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1705
- - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1706
- - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1707
- """
1708
- ...
1709
-
1710
- @typing.overload
1711
- def pypi_base(*, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1712
- """
1713
- Specifies the PyPI packages for all steps of the flow.
1714
-
1715
- Use `@pypi_base` to set common packages required by all
1716
- steps and use `@pypi` to specify step-specific overrides.
1717
-
1718
- Parameters
1719
- ----------
1720
- packages : Dict[str, str], default: {}
1721
- Packages to use for this flow. The key is the name of the package
1722
- and the value is the version to use.
1723
- python : str, optional, default: None
1724
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1725
- that the version used will correspond to the version of the Python interpreter used to start the run.
1726
- """
1727
- ...
1728
-
1729
- @typing.overload
1730
- def pypi_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1731
- ...
1732
-
1733
- def pypi_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, python: typing.Optional[str] = None):
1734
- """
1735
- Specifies the PyPI packages for all steps of the flow.
1736
-
1737
- Use `@pypi_base` to set common packages required by all
1738
- steps and use `@pypi` to specify step-specific overrides.
1787
+ cp.load(
1788
+ latest,
1789
+ "test-checkpoints"
1790
+ )
1739
1791
 
1740
- Parameters
1792
+ task = Task("TorchTuneFlow/8484/train/53673")
1793
+ with artifact_store_from(run=run, config={
1794
+ "client_params": {
1795
+ "aws_access_key_id": os.environ.get("MY_CUSTOM_ACCESS_KEY"),
1796
+ "aws_secret_access_key": os.environ.get("MY_CUSTOM_SECRET_KEY"),
1797
+ },
1798
+ }):
1799
+ load_model(
1800
+ task.data.model_ref,
1801
+ "test-models"
1802
+ )
1803
+ ```
1804
+ Parameters:
1741
1805
  ----------
1742
- packages : Dict[str, str], default: {}
1743
- Packages to use for this flow. The key is the name of the package
1744
- and the value is the version to use.
1745
- python : str, optional, default: None
1746
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1747
- that the version used will correspond to the version of the Python interpreter used to start the run.
1748
- """
1749
- ...
1750
-
1751
- def airflow_s3_key_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, bucket_key: typing.Union[str, typing.List[str]], bucket_name: str, wildcard_match: bool, aws_conn_id: str, verify: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1752
- """
1753
- The `@airflow_s3_key_sensor` decorator attaches a Airflow [S3KeySensor](https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/_api/airflow/providers/amazon/aws/sensors/s3/index.html#airflow.providers.amazon.aws.sensors.s3.S3KeySensor)
1754
- before the start step of the flow. This decorator only works when a flow is scheduled on Airflow
1755
- and is compiled using `airflow create`. More than one `@airflow_s3_key_sensor` can be
1756
- added as a flow decorators. Adding more than one decorator will ensure that `start` step
1757
- starts only after all sensors finish.
1758
1806
 
1807
+ type: str
1808
+ The type of the datastore. Can be one of 's3', 'gcs', 'azure' or any other supported metaflow Datastore.
1759
1809
 
1760
- Parameters
1761
- ----------
1762
- timeout : int
1763
- Time, in seconds before the task times out and fails. (Default: 3600)
1764
- poke_interval : int
1765
- Time in seconds that the job should wait in between each try. (Default: 60)
1766
- mode : str
1767
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1768
- exponential_backoff : bool
1769
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1770
- pool : str
1771
- the slot pool this task should run in,
1772
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1773
- soft_fail : bool
1774
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1775
- name : str
1776
- Name of the sensor on Airflow
1777
- description : str
1778
- Description of sensor in the Airflow UI
1779
- bucket_key : Union[str, List[str]]
1780
- The key(s) being waited on. Supports full s3:// style url or relative path from root level.
1781
- When it's specified as a full s3:// url, please leave `bucket_name` as None
1782
- bucket_name : str
1783
- Name of the S3 bucket. Only needed when bucket_key is not provided as a full s3:// url.
1784
- When specified, all the keys passed to bucket_key refers to this bucket. (Default:None)
1785
- wildcard_match : bool
1786
- whether the bucket_key should be interpreted as a Unix wildcard pattern. (Default: False)
1787
- aws_conn_id : str
1788
- a reference to the s3 connection on Airflow. (Default: None)
1789
- verify : bool
1790
- Whether or not to verify SSL certificates for S3 connection. (Default: None)
1810
+ config: dict or Callable
1811
+ Dictionary of configuration options for the datastore. The following keys are required:
1812
+ - root: The root path in the datastore where the data will be saved. (needs to be in the format expected by the datastore)
1813
+ - example: 's3://bucket-name/path/to/root'
1814
+ - example: 'gs://bucket-name/path/to/root'
1815
+ - example: 'https://myblockacc.blob.core.windows.net/metaflow/'
1816
+ - role_arn (optional): AWS IAM role to access s3 bucket (only when `type` is 's3')
1817
+ - session_vars (optional): AWS session variables to access s3 bucket (only when `type` is 's3')
1818
+ - client_params (optional): AWS client parameters to access s3 bucket (only when `type` is 's3')
1791
1819
  """
1792
1820
  ...
1793
1821
 
1794
1822
  @typing.overload
1795
- def conda_base(*, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1823
+ def trigger_on_finish(*, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1796
1824
  """
1797
- Specifies the Conda environment for all steps of the flow.
1825
+ Specifies the flow(s) that this flow depends on.
1798
1826
 
1799
- Use `@conda_base` to set common libraries required by all
1800
- steps and use `@conda` to specify step-specific additions.
1827
+ ```
1828
+ @trigger_on_finish(flow='FooFlow')
1829
+ ```
1830
+ or
1831
+ ```
1832
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1833
+ ```
1834
+ This decorator respects the @project decorator and triggers the flow
1835
+ when upstream runs within the same namespace complete successfully
1836
+
1837
+ Additionally, you can specify project aware upstream flow dependencies
1838
+ by specifying the fully qualified project_flow_name.
1839
+ ```
1840
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1841
+ ```
1842
+ or
1843
+ ```
1844
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1845
+ ```
1846
+
1847
+ You can also specify just the project or project branch (other values will be
1848
+ inferred from the current project or project branch):
1849
+ ```
1850
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1851
+ ```
1852
+
1853
+ Note that `branch` is typically one of:
1854
+ - `prod`
1855
+ - `user.bob`
1856
+ - `test.my_experiment`
1857
+ - `prod.staging`
1801
1858
 
1802
1859
 
1803
1860
  Parameters
1804
1861
  ----------
1805
- packages : Dict[str, str], default {}
1806
- Packages to use for this flow. The key is the name of the package
1807
- and the value is the version to use.
1808
- libraries : Dict[str, str], default {}
1809
- Supported for backward compatibility. When used with packages, packages will take precedence.
1810
- python : str, optional, default None
1811
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1812
- that the version used will correspond to the version of the Python interpreter used to start the run.
1813
- disabled : bool, default False
1814
- If set to True, disables Conda.
1862
+ flow : Union[str, Dict[str, str]], optional, default None
1863
+ Upstream flow dependency for this flow.
1864
+ flows : List[Union[str, Dict[str, str]]], default []
1865
+ Upstream flow dependencies for this flow.
1866
+ options : Dict[str, Any], default {}
1867
+ Backend-specific configuration for tuning eventing behavior.
1815
1868
  """
1816
1869
  ...
1817
1870
 
1818
1871
  @typing.overload
1819
- def conda_base(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1872
+ def trigger_on_finish(f: typing.Type[FlowSpecDerived]) -> typing.Type[FlowSpecDerived]:
1820
1873
  ...
1821
1874
 
1822
- def conda_base(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, packages: typing.Dict[str, str] = {}, libraries: typing.Dict[str, str] = {}, python: typing.Optional[str] = None, disabled: bool = False):
1875
+ def trigger_on_finish(f: typing.Optional[typing.Type[FlowSpecDerived]] = None, *, flow: typing.Union[typing.Dict[str, str], str, None] = None, flows: typing.List[typing.Union[str, typing.Dict[str, str]]] = [], options: typing.Dict[str, typing.Any] = {}):
1823
1876
  """
1824
- Specifies the Conda environment for all steps of the flow.
1825
-
1826
- Use `@conda_base` to set common libraries required by all
1827
- steps and use `@conda` to specify step-specific additions.
1828
-
1877
+ Specifies the flow(s) that this flow depends on.
1829
1878
 
1830
- Parameters
1831
- ----------
1832
- packages : Dict[str, str], default {}
1833
- Packages to use for this flow. The key is the name of the package
1834
- and the value is the version to use.
1835
- libraries : Dict[str, str], default {}
1836
- Supported for backward compatibility. When used with packages, packages will take precedence.
1837
- python : str, optional, default None
1838
- Version of Python to use, e.g. '3.7.4'. A default value of None implies
1839
- that the version used will correspond to the version of the Python interpreter used to start the run.
1840
- disabled : bool, default False
1841
- If set to True, disables Conda.
1842
- """
1843
- ...
1844
-
1845
- def airflow_external_task_sensor(*, timeout: int, poke_interval: int, mode: str, exponential_backoff: bool, pool: str, soft_fail: bool, name: str, description: str, external_dag_id: str, external_task_ids: typing.List[str], allowed_states: typing.List[str], failed_states: typing.List[str], execution_delta: "datetime.timedelta", check_existence: bool) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1846
- """
1847
- The `@airflow_external_task_sensor` decorator attaches a Airflow [ExternalTaskSensor](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/sensors/external_task/index.html#airflow.sensors.external_task.ExternalTaskSensor) before the start step of the flow.
1848
- This decorator only works when a flow is scheduled on Airflow and is compiled using `airflow create`. More than one `@airflow_external_task_sensor` can be added as a flow decorators. Adding more than one decorator will ensure that `start` step starts only after all sensors finish.
1879
+ ```
1880
+ @trigger_on_finish(flow='FooFlow')
1881
+ ```
1882
+ or
1883
+ ```
1884
+ @trigger_on_finish(flows=['FooFlow', 'BarFlow'])
1885
+ ```
1886
+ This decorator respects the @project decorator and triggers the flow
1887
+ when upstream runs within the same namespace complete successfully
1849
1888
 
1889
+ Additionally, you can specify project aware upstream flow dependencies
1890
+ by specifying the fully qualified project_flow_name.
1891
+ ```
1892
+ @trigger_on_finish(flow='my_project.branch.my_branch.FooFlow')
1893
+ ```
1894
+ or
1895
+ ```
1896
+ @trigger_on_finish(flows=['my_project.branch.my_branch.FooFlow', 'BarFlow'])
1897
+ ```
1850
1898
 
1851
- Parameters
1852
- ----------
1853
- timeout : int
1854
- Time, in seconds before the task times out and fails. (Default: 3600)
1855
- poke_interval : int
1856
- Time in seconds that the job should wait in between each try. (Default: 60)
1857
- mode : str
1858
- How the sensor operates. Options are: { poke | reschedule }. (Default: "poke")
1859
- exponential_backoff : bool
1860
- allow progressive longer waits between pokes by using exponential backoff algorithm. (Default: True)
1861
- pool : str
1862
- the slot pool this task should run in,
1863
- slot pools are a way to limit concurrency for certain tasks. (Default:None)
1864
- soft_fail : bool
1865
- Set to true to mark the task as SKIPPED on failure. (Default: False)
1866
- name : str
1867
- Name of the sensor on Airflow
1868
- description : str
1869
- Description of sensor in the Airflow UI
1870
- external_dag_id : str
1871
- The dag_id that contains the task you want to wait for.
1872
- external_task_ids : List[str]
1873
- The list of task_ids that you want to wait for.
1874
- If None (default value) the sensor waits for the DAG. (Default: None)
1875
- allowed_states : List[str]
1876
- Iterable of allowed states, (Default: ['success'])
1877
- failed_states : List[str]
1878
- Iterable of failed or dis-allowed states. (Default: None)
1879
- execution_delta : datetime.timedelta
1880
- time difference with the previous execution to look at,
1881
- the default is the same logical date as the current task or DAG. (Default: None)
1882
- check_existence: bool
1883
- Set to True to check if the external task exists or check if
1884
- the DAG to wait for exists. (Default: True)
1885
- """
1886
- ...
1887
-
1888
- def project(*, name: str, branch: typing.Optional[str] = None, production: bool = False) -> typing.Callable[[typing.Type[FlowSpecDerived]], typing.Type[FlowSpecDerived]]:
1889
- """
1890
- Specifies what flows belong to the same project.
1899
+ You can also specify just the project or project branch (other values will be
1900
+ inferred from the current project or project branch):
1901
+ ```
1902
+ @trigger_on_finish(flow={"name": "FooFlow", "project": "my_project", "project_branch": "branch"})
1903
+ ```
1891
1904
 
1892
- A project-specific namespace is created for all flows that
1893
- use the same `@project(name)`.
1905
+ Note that `branch` is typically one of:
1906
+ - `prod`
1907
+ - `user.bob`
1908
+ - `test.my_experiment`
1909
+ - `prod.staging`
1894
1910
 
1895
1911
 
1896
1912
  Parameters
1897
1913
  ----------
1898
- name : str
1899
- Project name. Make sure that the name is unique amongst all
1900
- projects that use the same production scheduler. The name may
1901
- contain only lowercase alphanumeric characters and underscores.
1902
-
1903
- branch : Optional[str], default None
1904
- The branch to use. If not specified, the branch is set to
1905
- `user.<username>` unless `production` is set to `True`. This can
1906
- also be set on the command line using `--branch` as a top-level option.
1907
- It is an error to specify `branch` in the decorator and on the command line.
1908
-
1909
- production : bool, default False
1910
- Whether or not the branch is the production branch. This can also be set on the
1911
- command line using `--production` as a top-level option. It is an error to specify
1912
- `production` in the decorator and on the command line.
1913
- The project branch name will be:
1914
- - if `branch` is specified:
1915
- - if `production` is True: `prod.<branch>`
1916
- - if `production` is False: `test.<branch>`
1917
- - if `branch` is not specified:
1918
- - if `production` is True: `prod`
1919
- - if `production` is False: `user.<username>`
1914
+ flow : Union[str, Dict[str, str]], optional, default None
1915
+ Upstream flow dependency for this flow.
1916
+ flows : List[Union[str, Dict[str, str]]], default []
1917
+ Upstream flow dependencies for this flow.
1918
+ options : Dict[str, Any], default {}
1919
+ Backend-specific configuration for tuning eventing behavior.
1920
1920
  """
1921
1921
  ...
1922
1922