ob-metaflow-extensions 1.2.3__tar.gz → 1.4.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/PKG-INFO +1 -1
  2. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/__init__.py +5 -1
  3. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +16 -10
  4. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +51 -32
  5. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +2 -1
  6. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +119 -51
  7. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +7 -0
  8. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +3 -0
  9. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +9 -1
  10. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +88 -5
  11. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +66 -4
  12. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +1 -1
  13. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +4 -10
  14. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +29 -18
  15. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +1 -1
  16. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +1 -1
  17. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/utils.py +2 -2
  18. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +21 -6
  19. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
  20. ob_metaflow_extensions-1.2.3/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py → ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +15 -64
  21. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
  22. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
  23. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
  24. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
  25. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
  26. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
  27. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
  28. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
  29. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
  30. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
  31. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
  32. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
  33. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +6 -3
  34. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +13 -7
  35. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +8 -2
  36. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/remote_config.py +8 -3
  37. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +61 -1
  38. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
  39. ob_metaflow_extensions-1.4.34/metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
  40. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
  41. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/ob_metaflow_extensions.egg-info/SOURCES.txt +13 -0
  42. ob_metaflow_extensions-1.4.34/ob_metaflow_extensions.egg-info/requires.txt +3 -0
  43. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/setup.py +2 -2
  44. ob_metaflow_extensions-1.2.3/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +0 -139
  45. ob_metaflow_extensions-1.2.3/ob_metaflow_extensions.egg-info/requires.txt +0 -3
  46. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/MANIFEST.in +0 -0
  47. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/README.md +0 -0
  48. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/__init__.py +0 -0
  49. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
  50. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
  51. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
  52. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +0 -0
  53. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
  54. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
  55. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +0 -0
  56. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
  57. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +0 -0
  58. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +0 -0
  59. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
  60. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +0 -0
  61. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +0 -0
  62. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +0 -0
  63. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +0 -0
  64. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +0 -0
  65. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +0 -0
  66. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +0 -0
  67. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/core/validations.py +0 -0
  68. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
  69. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
  70. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
  71. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/aws/__init__.py +0 -0
  72. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/aws/assume_role.py +0 -0
  73. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
  74. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
  75. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
  76. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
  77. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +0 -0
  78. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
  79. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +0 -0
  80. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
  81. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
  82. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
  83. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
  84. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
  85. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +0 -0
  86. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nim/card.py +0 -0
  87. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py +0 -0
  88. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
  89. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nim/utils.py +0 -0
  90. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
  91. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
  92. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
  93. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
  94. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
  95. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
  96. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
  97. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
  98. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
  99. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +0 -0
  100. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvct/nvct.py +0 -0
  101. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +0 -0
  102. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +0 -0
  103. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +0 -0
  104. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/nvct/utils.py +0 -0
  105. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
  106. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/ollama/constants.py +0 -0
  107. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +0 -0
  108. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
  109. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/ollama/status_card.py +0 -0
  110. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
  111. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
  112. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
  113. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
  114. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
  115. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
  116. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +0 -0
  117. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
  118. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
  119. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
  120. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
  121. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
  122. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
  123. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +0 -0
  124. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/vllm/__init__.py +0 -0
  125. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/vllm/constants.py +0 -0
  126. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +0 -0
  127. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/vllm/status_card.py +0 -0
  128. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +0 -0
  129. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
  130. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
  131. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
  132. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/ob_internal.py +0 -0
  133. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
  134. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
  135. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
  136. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
  137. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
  138. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +0 -0
  139. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +0 -0
  140. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
  141. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
  142. {ob_metaflow_extensions-1.2.3 → ob_metaflow_extensions-1.4.34}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob_metaflow_extensions
3
- Version: 1.2.3
3
+ Version: 1.4.34
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -335,10 +335,14 @@ STEP_DECORATORS_DESC = [
335
335
  ("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
336
336
  ("tensorboard", ".tensorboard.TensorboardDecorator"),
337
337
  ("gpu_profile", ".profilers.gpu_profile_decorator.GPUProfileDecorator"),
338
+ ("test_append_card", ".profilers.simple_card_decorator.DynamicCardAppendDecorator"),
338
339
  ("nim", ".nim.nim_decorator.NimDecorator"),
339
340
  ("ollama", ".ollama.OllamaDecorator"),
340
341
  ("vllm", ".vllm.VLLMDecorator"),
341
342
  ("app_deploy", ".apps.app_deploy_decorator.AppDeployDecorator"),
343
+ ("s3_proxy", ".s3_proxy.s3_proxy_decorator.S3ProxyDecorator"),
344
+ ("nebius_s3_proxy", ".s3_proxy.s3_proxy_decorator.NebiusS3ProxyDecorator"),
345
+ ("coreweave_s3_proxy", ".s3_proxy.s3_proxy_decorator.CoreWeaveS3ProxyDecorator"),
342
346
  ]
343
347
 
344
348
  TOGGLE_STEP_DECORATOR = [
@@ -357,4 +361,4 @@ SECRETS_PROVIDERS_DESC = [
357
361
  ("outerbounds", ".secrets.secrets.OuterboundsSecretsProvider"),
358
362
  ]
359
363
  # Adding an override here so the library can be imported at the metaflow.plugins level
360
- __mf_promote_submodules__ = ["snowflake", "ollama", "torchtune"]
364
+ __mf_promote_submodules__ = ["snowflake", "ollama", "torchtune", "optuna"]
@@ -180,6 +180,7 @@ class WorkerInfoDict(TypedDict):
180
180
  pending: Dict[str, List[WorkerStatus]]
181
181
  running: Dict[str, List[WorkerStatus]]
182
182
  crashlooping: Dict[str, List[WorkerStatus]]
183
+ failed: Dict[str, List[WorkerStatus]]
183
184
 
184
185
 
185
186
  class CurrentWorkerInfo(TypedDict):
@@ -199,29 +200,29 @@ class DEPLOYMENT_READY_CONDITIONS:
199
200
  This allows users or platform designers to configure the criteria for deployment readiness.
200
201
 
201
202
  Why do we need deployment readiness conditions?
202
- - Deployments might be taking place from a CI/CD esq environment, In these setups, the downstream build triggers might be depending on a specific criteria for deployment completion. Having readiness conditions allows the CI/CD systems to get a signal of when the deployment is ready.
203
+ - Deployments might be taking place from a CI/CD-esque environment, In these setups, the downstream build triggers might be depending on a specific criteria for deployment completion. Having readiness conditions allows the CI/CD systems to get a signal of when the deployment is ready.
203
204
  - Users might be calling the deployment API under different conditions:
204
205
  - Some users might want a cluster of workers ready before serving traffic while others might want just one worker ready to start serving traffic.
205
206
 
206
207
  Some readiness conditions include:
207
- 1) [at_least_one_running] Atleast min(min_replicas, 1) workers of the current deployment instance's version have started running.
208
+ 1) [at_least_one_running] At least min(min_replicas, 1) workers of the current deployment instance's version have started running.
208
209
  - Usecase: Some endpoints may be deployed ephemerally and are considered ready when at least one instance is running; additional instances are for load management.
209
- 2) [all_running] Atleast min_replicas number of workers are running for the deployment to be considered ready.
210
+ 2) [all_running] At least min_replicas number of workers are running for the deployment to be considered ready.
210
211
  - Usecase: Operators may require that all replicas are available before traffic is routed. Needed when inference endpoints maybe under some SLA or require a larger load
211
- 3) [fully_finished] Atleast min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
212
+ 3) [fully_finished] At least min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
212
213
  - Usecase: Ensuring endpoint is fully available and no other versions are running or endpoint has been fully scaled down.
213
214
  4) [async] The deployment will be assumed ready as soon as the server responds with a 200.
214
215
  - Usecase: Operators may only care that the URL is minted for the deployment or the deployment eventually scales down to 0.
215
216
  """
216
217
 
217
- # `ATLEAST_ONE_RUNNING` implies that atleast one worker of the current deployment instance's version has started running.
218
+ # `ATLEAST_ONE_RUNNING` implies that at least one worker of the current deployment instance's version has started running.
218
219
  ATLEAST_ONE_RUNNING = "at_least_one_running"
219
220
 
220
221
  # `ALL_RUNNING` implies that all workers of the current deployment instance's version have started running (i.e. all workers aligning to the minimum number of replicas).
221
222
  # It doesn't imply that all the workers relating to other deployments have been torn down.
222
223
  ALL_RUNNING = "all_running"
223
224
 
224
- # `FULLY_FINISHED` implies Atleast min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
225
+ # `FULLY_FINISHED` implies at least min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
225
226
  FULLY_FINISHED = "fully_finished"
226
227
 
227
228
  # `ASYNC` implies that the deployment will be assumed ready after the URL is minted and the worker statuses are not checked.
@@ -442,14 +443,16 @@ def _capsule_worker_semantic_status(
442
443
  xx[worker_version].append(w)
443
444
  return xx
444
445
 
446
+ # phases can be Pending, Running, Succeeded, Failed, Unknown, CrashLoopBackOff
445
447
  pending_workers = _make_version_dict(workers, "Pending")
446
448
  running_workers = _make_version_dict(workers, "Running")
447
449
  crashlooping_workers = _make_version_dict(workers, "CrashLoopBackOff")
450
+ failed_workers = _make_version_dict(workers, "Failed")
448
451
 
449
452
  # current_status (formulated basis):
450
- # - atleast one pods are pending for `_end_state_capsule_version`
451
- # - atleast one pod is in Running state for `_end_state_capsule_version` (maybe terminal) [Might require heath-check thing here]
452
- # - alteast one pod is crashlooping for `_end_state_capsule_version` (maybe terminal)
453
+ # - at least one pods are pending for `_end_state_capsule_version`
454
+ # - at least one pod is in Running state for `_end_state_capsule_version` (maybe terminal) [Might require health-check thing here]
455
+ # - at least one pod is crashlooping for `_end_state_capsule_version` (maybe terminal)
453
456
  # - all pods are running for `_end_state_capsule_version` that match the minimum number of replicas
454
457
  # - all pods are running for `_end_state_capsule_version` that match the maximum number of replicas and no other pods of older versions are running
455
458
  # - no pods relating to `_end_state_capsule_version` are pending/running/crashlooping
@@ -464,7 +467,8 @@ def _capsule_worker_semantic_status(
464
467
  "at_least_one_running": (
465
468
  count_for_version(running_workers) >= min(min_replicas, 1)
466
469
  ),
467
- "at_least_one_crashlooping": count_for_version(crashlooping_workers) > 0,
470
+ "at_least_one_crashlooping": count_for_version(crashlooping_workers) > 0
471
+ or count_for_version(failed_workers) > 0,
468
472
  "none_present": (
469
473
  count_for_version(running_workers) == 0
470
474
  and count_for_version(pending_workers) == 0
@@ -484,6 +488,7 @@ def _capsule_worker_semantic_status(
484
488
  "pending": count_for_version(pending_workers),
485
489
  "running": count_for_version(running_workers),
486
490
  "crashlooping": count_for_version(crashlooping_workers),
491
+ "failed": count_for_version(failed_workers),
487
492
  },
488
493
  }
489
494
 
@@ -491,6 +496,7 @@ def _capsule_worker_semantic_status(
491
496
  "pending": pending_workers,
492
497
  "running": running_workers,
493
498
  "crashlooping": crashlooping_workers,
499
+ "failed": failed_workers,
494
500
  }
495
501
 
496
502
  return {
@@ -239,7 +239,7 @@ def _bake_image(app_config: AppConfig, cache_dir: str, logger):
239
239
  baking_status.resolved_image,
240
240
  )
241
241
  app_config.set_state("python_path", baking_status.python_path)
242
- logger("🐳 Using The Docker Image : %s" % app_config.get_state("image"))
242
+ logger("🐳 Using the docker image : %s" % app_config.get_state("image"))
243
243
 
244
244
 
245
245
  def print_table(data, headers):
@@ -339,7 +339,7 @@ def deployment_instance_options(func):
339
339
  "--readiness-wait-time",
340
340
  type=int,
341
341
  help="The time (in seconds) to monitor the deployment for readiness after the readiness condition is met.",
342
- default=4,
342
+ default=15,
343
343
  )
344
344
  @click.option(
345
345
  "--deployment-timeout",
@@ -374,11 +374,11 @@ def _package_necessary_things(app_config: AppConfig, logger):
374
374
  # or is it relative to where the caller command is sitting. Ideally it should work
375
375
  # like Kustomizations where its relative to where the yaml file sits for simplicity
376
376
  # of understanding relationships between config files. Ideally users can pass the src_path
377
- # from the command line and that will aliviate any need to package any other directories for
377
+ # from the command line and that will alleviate any need to package any other directories for
378
378
  #
379
379
 
380
- package_dir = app_config.get_state("packaging_directory")
381
- if package_dir is None:
380
+ package_dirs = app_config.get_state("packaging_directories")
381
+ if package_dirs is None:
382
382
  app_config.set_state("code_package_url", None)
383
383
  app_config.set_state("code_package_key", None)
384
384
  return
@@ -391,11 +391,24 @@ def _package_necessary_things(app_config: AppConfig, logger):
391
391
  datastore_type=DEFAULT_DATASTORE, code_package_prefix=CODE_PACKAGE_PREFIX
392
392
  )
393
393
  package_url, package_key = packager.store(
394
- paths_to_include=[package_dir], file_suffixes=suffixes
394
+ paths_to_include=package_dirs, file_suffixes=suffixes
395
395
  )
396
396
  app_config.set_state("code_package_url", package_url)
397
397
  app_config.set_state("code_package_key", package_key)
398
- logger("💾 Code Package Saved to : %s" % app_config.get_state("code_package_url"))
398
+ logger("💾 Code package saved to : %s" % app_config.get_state("code_package_url"))
399
+
400
+
401
+ def _sniff_pyproject_and_requirements(packaging_directories: List[str]):
402
+ pyproject_path = None
403
+ requirements_path = None
404
+ for directory in packaging_directories:
405
+ pyproject_toml = os.path.join(directory, "pyproject.toml")
406
+ requirements_txt = os.path.join(directory, "requirements.txt")
407
+ if os.path.exists(pyproject_toml):
408
+ pyproject_path = pyproject_toml
409
+ elif os.path.exists(requirements_txt):
410
+ requirements_path = requirements_txt
411
+ return pyproject_path, requirements_path
399
412
 
400
413
 
401
414
  @app.command(help="Deploy an app to the Outerbounds Platform.")
@@ -449,23 +462,19 @@ def deploy(
449
462
  system_msg=True,
450
463
  )
451
464
 
452
- packaging_directory = None
453
- package_src_path = app_config.get("package", {}).get("src_path", None)
454
- if package_src_path:
455
- if os.path.isfile(package_src_path):
456
- raise AppConfigError("src_path must be a directory, not a file")
457
- elif os.path.isdir(package_src_path):
458
- packaging_directory = os.path.abspath(package_src_path)
459
- else:
460
- raise AppConfigError(f"src_path '{package_src_path}' does not exist")
461
- else:
462
- # If src_path is None then we assume then we can assume for the moment
465
+ package_src_paths = app_config.get("package", {}).get("src_paths", [])
466
+ if package_src_paths is None:
467
+ package_src_paths = []
468
+
469
+ if len(package_src_paths) == 0:
470
+ # If src_paths is None then we assume then we can assume for the moment
463
471
  # that we can package the current working directory.
464
- packaging_directory = os.getcwd()
472
+ package_src_paths = [os.getcwd()]
465
473
 
466
- app_config.set_state("packaging_directory", packaging_directory)
474
+ app_config.set_state("packaging_directories", package_src_paths)
467
475
  logger(
468
- "📦 Packaging Directory : %s" % app_config.get_state("packaging_directory"),
476
+ "📦 Packaging directories : %s"
477
+ % ", ".join(app_config.get_state("packaging_directories")),
469
478
  )
470
479
 
471
480
  if app_config.get("no_deps", False):
@@ -484,22 +493,32 @@ def deploy(
484
493
  dependencies.get("conda", None) is None,
485
494
  ]
486
495
  ):
496
+ python_version = dependencies.get(
497
+ "python"
498
+ ) # python gets a default value so it's always set.
487
499
  # The user has not set any dependencies, so we can sniff the packaging directory
488
500
  # for a dependencies file.
489
- requirements_file = os.path.join(
490
- packaging_directory, "requirements.txt"
501
+ pyproject_toml, requirements_file = _sniff_pyproject_and_requirements(
502
+ package_src_paths
491
503
  )
492
- pyproject_toml = os.path.join(packaging_directory, "pyproject.toml")
493
- if os.path.exists(pyproject_toml):
504
+ if pyproject_toml:
494
505
  app_config.set_state(
495
- "dependencies", {"from_pyproject_toml": pyproject_toml}
506
+ "dependencies",
507
+ {
508
+ "from_pyproject_toml": pyproject_toml,
509
+ "python": python_version,
510
+ },
496
511
  )
497
512
  logger(
498
513
  "📦 Using dependencies from pyproject.toml: %s" % pyproject_toml
499
514
  )
500
- elif os.path.exists(requirements_file):
515
+ elif requirements_file:
501
516
  app_config.set_state(
502
- "dependencies", {"from_requirements_file": requirements_file}
517
+ "dependencies",
518
+ {
519
+ "from_requirements_file": requirements_file,
520
+ "python": python_version,
521
+ },
503
522
  )
504
523
  logger(
505
524
  "📦 Using dependencies from requirements.txt: %s"
@@ -611,7 +630,7 @@ def deploy(
611
630
  )
612
631
  raise AppConfigError(message)
613
632
  capsule_logger(
614
- f"🚀 {'' if not force_upgrade else 'Force'} Upgrading {capsule.capsule_type.lower()} `{capsule.name}`....",
633
+ f"🚀 {'Upgrading' if not force_upgrade else 'Force upgrading'} {capsule.capsule_type.lower()} `{capsule.name}`....",
615
634
  color=ColorTheme.INFO_COLOR,
616
635
  system_msg=True,
617
636
  )
@@ -632,7 +651,7 @@ def deploy(
632
651
  capsule_spinner.stop()
633
652
 
634
653
  logger(
635
- f"💊 {capsule.capsule_type} {app_config.config['name']} ({capsule.identifier}) deployed! {capsule.capsule_type} exposed on the URL: {capsule.url}",
654
+ f"💊 {capsule.capsule_type} {app_config.config['name']} ({capsule.identifier}) deployed! {capsule.capsule_type} available on the URL: {capsule.url}",
636
655
  color=ColorTheme.INFO_COLOR,
637
656
  system_msg=True,
638
657
  )
@@ -761,7 +780,7 @@ def list(ctx, project, branch, name, tags, format, auth_type):
761
780
  def delete(ctx, name, cap_id, project, branch, tags, auto_approve):
762
781
 
763
782
  """Delete an app/apps from the Outerbounds Platform."""
764
- # Atleast one of the args need to be provided
783
+ # At least one of the args need to be provided
765
784
  if not any(
766
785
  [
767
786
  name is not None,
@@ -772,7 +791,7 @@ def delete(ctx, name, cap_id, project, branch, tags, auto_approve):
772
791
  ]
773
792
  ):
774
793
  raise AppConfigError(
775
- "Atleast one of the options need to be provided. You can use --name, --id, --project, --branch, --tag"
794
+ "At least one of the options need to be provided. You can use --name, --id, --project, --branch, --tag"
776
795
  )
777
796
 
778
797
  capsule_api = CapsuleApi(ctx.obj.api_url, ctx.obj.perimeter)
@@ -45,10 +45,11 @@ def _try_loading_yaml(file):
45
45
  class AuthType:
46
46
  BROWSER = "Browser"
47
47
  API = "API"
48
+ BROWSER_AND_API = "BrowserAndApi"
48
49
 
49
50
  @classmethod
50
51
  def enums(cls):
51
- return [cls.BROWSER, cls.API]
52
+ return [cls.BROWSER, cls.API, cls.BROWSER_AND_API]
52
53
 
53
54
  @classproperty
54
55
  def default(cls):
@@ -7,7 +7,7 @@ import sys
7
7
  import time
8
8
  from functools import partial
9
9
  import shlex
10
- from typing import Optional, List, Dict, Any, Tuple, Union
10
+ from typing import Optional, List, Dict, Any, Tuple, Union, Callable
11
11
  from .utils import TODOException, safe_requests_wrapper, MaximumRetriesExceeded
12
12
  from .app_config import AppConfig, CAPSULE_DEBUG, AuthType
13
13
  from . import experimental
@@ -44,24 +44,24 @@ class CapsuleStateMachine:
44
44
  - Happy Path:
45
45
  - First time Create :
46
46
  - wait for status.updateInProgress to be set to False
47
- - (interleved) Poll the worker endpoints to check their status
47
+ - (interleaved) Poll the worker endpoints to check their status
48
48
  - showcase how many workers are coming up if things are on the cli side.
49
49
  - If the user has set some flag like `--dont-wait-to-fully-finish` then we check the `status.currentlyServedVersion` to see if even one replica is ready to
50
50
  serve traffic.
51
51
  - once the status.updateInProgress is set to False, it means that the replicas are ready
52
52
  - Upgrade:
53
53
  - wait for status.updateInProgress to be set to False
54
- - (interleved) Poll the worker endpoints to check their status and signal the user the number replicas coming up
54
+ - (interleaved) Poll the worker endpoints to check their status and signal the user the number replicas coming up
55
55
  - If the user has set some flag like `--dont-wait-to-fully-finish` then we check the `status.currentlyServedVersion` to see if even one replica is ready to
56
56
  serve traffic.
57
57
  - Unhappy Path:
58
58
  - First time Create :
59
59
  - wait for status.updateInProgress to be set to False,
60
- - (interleved) Poll the workers to check their status.
60
+ - (interleaved) Poll the workers to check their status.
61
61
  - If the worker pertaining the current deployment instance version is crashlooping then crash the deployment process with the error messages and logs.
62
62
  - Upgrade:
63
63
  - wait for status.updateInProgress to be set to False,
64
- - (interleved) Poll the workers to check their status.
64
+ - (interleaved) Poll the workers to check their status.
65
65
  - If the worker pertaining the current deployment instance version is crashlooping then crash the deployment process with the error messages and logs.
66
66
 
67
67
  """
@@ -75,7 +75,6 @@ class CapsuleStateMachine:
75
75
  return self._status_trail
76
76
 
77
77
  def add_status(self, status: CapsuleStatus):
78
- assert type(status) == dict, "TODO: Make this check somewhere else"
79
78
  self._status_trail.append({"timestamp": time.time(), "status": status})
80
79
 
81
80
  @property
@@ -116,7 +115,9 @@ class CapsuleStateMachine:
116
115
  pass
117
116
 
118
117
  def save_debug_info(self, state_dir: str):
119
- debug_path = os.path.join(state_dir, f"debug_capsule_{self._capsule_id}.json")
118
+ debug_path = os.path.join(
119
+ state_dir, f"debug_capsule_sm_{self._capsule_id}.json"
120
+ )
120
121
  with open(debug_path, "w") as f:
121
122
  json.dump(self._status_trail, f, indent=4)
122
123
 
@@ -210,9 +211,9 @@ class CapsuleInput:
210
211
  def construct_exec_command(cls, commands: List[str]):
211
212
  commands = ["set -eEuo pipefail"] + commands
212
213
  command_string = "\n".join(commands)
213
- # First constuct a base64 encoded string of the quoted command
214
+ # First construct a base64 encoded string of the quoted command
214
215
  # One of the reasons we don't directly pass the command string to the backend with a `\n` join
215
- # is because the backend controller doesnt play nice when the command can be a multi-line string.
216
+ # is because the backend controller doesn't play nice when the command can be a multi-line string.
216
217
  # So we encode it to a base64 string and then decode it back to a command string at runtime to provide to
217
218
  # `bash -c`. The ideal thing to have done is to run "bash -c {shlex.quote(command_string)}" and call it a day
218
219
  # but the backend controller yields the following error:
@@ -255,6 +256,12 @@ class CapsuleInput:
255
256
  replicas.get("min"),
256
257
  replicas.get("max"),
257
258
  )
259
+ rpm = replicas.get("scaling_policy", {}).get("rpm", None)
260
+ autoscaling_config = {}
261
+ if rpm:
262
+ autoscaling_config = {
263
+ "requestRateBasedAutoscalingConfig": {"targetRequestsPerMinute": rpm}
264
+ }
258
265
  if fixed is not None:
259
266
  _min, _max = fixed, fixed
260
267
  gpu_resource = app_config.get_state("resources").get("gpu")
@@ -296,6 +303,7 @@ class CapsuleInput:
296
303
  "autoscalingConfig": {
297
304
  "minReplicas": _min,
298
305
  "maxReplicas": _max,
306
+ **autoscaling_config,
299
307
  },
300
308
  **_scheduling_config,
301
309
  "containerStartupConfig": {
@@ -420,7 +428,7 @@ class CapsuleApi:
420
428
  message="Capsule JSON decode failed",
421
429
  )
422
430
 
423
- def get(self, capsule_id: str):
431
+ def get(self, capsule_id: str) -> Dict[str, Any]:
424
432
  _url = os.path.join(self._base_url, capsule_id)
425
433
  response = self._wrapped_api_caller(
426
434
  requests.get,
@@ -439,6 +447,35 @@ class CapsuleApi:
439
447
  message="Capsule JSON decode failed",
440
448
  )
441
449
 
450
+ # TODO: refactor me since name *currently(9/8/25)* is unique across capsules.
451
+ def get_by_name(self, name: str, most_recent_only: bool = True):
452
+ _url = os.path.join(self._base_url, f"?displayName={name}")
453
+ response = self._wrapped_api_caller(
454
+ requests.get,
455
+ _url,
456
+ retryable_status_codes=[409], # todo : verify me
457
+ conn_error_retries=3,
458
+ )
459
+ try:
460
+ if most_recent_only:
461
+ result = response.json()
462
+ candidates = result["capsules"]
463
+ if not candidates:
464
+ return None
465
+ return sorted(
466
+ candidates, key=lambda x: x["metadata"]["createdAt"], reverse=True
467
+ )[0]
468
+ else:
469
+ return response.json()
470
+ except json.JSONDecodeError as e:
471
+ raise CapsuleApiException(
472
+ _url,
473
+ "get",
474
+ response.status_code,
475
+ response.text,
476
+ message="Capsule JSON decode failed",
477
+ )
478
+
442
479
  def list(self):
443
480
  response = self._wrapped_api_caller(
444
481
  requests.get,
@@ -641,7 +678,7 @@ class CapsuleDeployer:
641
678
  auth_type = self._app_config.get_state("auth", {}).get("type", AuthType.default)
642
679
  if auth_type == AuthType.BROWSER:
643
680
  return "App"
644
- elif auth_type == AuthType.API:
681
+ elif auth_type == AuthType.API or auth_type == AuthType.BROWSER_AND_API:
645
682
  return "Endpoint"
646
683
  else:
647
684
  raise TODOException(f"Unknown auth type: {auth_type}")
@@ -682,7 +719,7 @@ class CapsuleDeployer:
682
719
  """
683
720
  - `capsule_response.version` contains the version of the object present in the database
684
721
  - `current_deployment_instance_version` contains the version of the object that was deployed by this instance of the deployer.
685
- In the situtation that the versions of the objects become a mismatch then it means that current deployment process is not giving the user the
722
+ In the situation that the versions of the objects become a mismatch then it means that current deployment process is not giving the user the
686
723
  output that they desire.
687
724
  """
688
725
  if capsule_response.get("version", None) != current_deployment_instance_version:
@@ -691,17 +728,51 @@ class CapsuleDeployer:
691
728
  f"A capsule upgrade was triggered outside current deployment instance. Current deployment version was discarded. Current deployment version: {current_deployment_instance_version} and new version: {capsule_response.get('version', None)}",
692
729
  )
693
730
 
731
+ def _update_capsule_and_worker_sm(
732
+ self,
733
+ capsule_sm: "CapsuleStateMachine",
734
+ workers_sm: "CapsuleWorkersStateMachine",
735
+ logger: Callable[[str], None],
736
+ ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
737
+ capsule_response = self.get()
738
+ capsule_sm.add_status(capsule_response.get("status", {})) # type: ignore
739
+
740
+ # We need to check if someone has not upgraded the capsule under the hood and
741
+ # the current deployment instance is invalid.
742
+ self._backend_version_mismatch_check(
743
+ capsule_response, self.current_deployment_instance_version # type: ignore
744
+ )
745
+ workers_response = self.get_workers()
746
+ capsule_sm.report_current_status(logger)
747
+ workers_sm.add_status(workers_response)
748
+ workers_sm.report_current_status(logger)
749
+ return capsule_response, workers_response
750
+
751
+ def _publish_capsule_debug_info(
752
+ self,
753
+ capsule_sm: "CapsuleStateMachine",
754
+ workers_sm: "CapsuleWorkersStateMachine",
755
+ capsule_response: Dict[str, Any],
756
+ ):
757
+ if CAPSULE_DEBUG and self._debug_dir:
758
+ capsule_sm.save_debug_info(self._debug_dir)
759
+ workers_sm.save_debug_info(self._debug_dir)
760
+ debug_path = os.path.join(
761
+ self._debug_dir, f"debug_capsule_{self.identifier}.json"
762
+ )
763
+ with open(debug_path, "w") as f:
764
+ f.write(json.dumps(capsule_response, indent=4))
765
+
694
766
  def _monitor_worker_readiness(
695
767
  self,
696
768
  workers_sm: "CapsuleWorkersStateMachine",
769
+ capsule_sm: "CapsuleStateMachine",
697
770
  ):
698
771
  """returns True if the worker is crashlooping, False otherwise"""
699
772
  logger = self._logger_fn or partial(print, file=sys.stderr)
700
773
  for i in range(self._readiness_wait_time):
701
774
  time.sleep(1)
702
- workers_response = self.get_workers()
703
- workers_sm.add_status(workers_response)
704
- workers_sm.report_current_status(logger)
775
+ self._update_capsule_and_worker_sm(capsule_sm, workers_sm, logger)
705
776
  if workers_sm.is_crashlooping:
706
777
  return True
707
778
  return False
@@ -713,7 +784,7 @@ class CapsuleDeployer:
713
784
  workers_status: List[WorkerStatus],
714
785
  ):
715
786
  for worker in workers_status:
716
- if worker["phase"] == "CrashLoopBackOff":
787
+ if worker["phase"] == "CrashLoopBackOff" or worker["phase"] == "Failed":
717
788
  return worker["workerId"]
718
789
  return None
719
790
 
@@ -747,21 +818,19 @@ class CapsuleDeployer:
747
818
  minimum_replicas=min_replicas,
748
819
  )
749
820
  self.status = state_machine
821
+
822
+ # This loop will check all the conditions that help verify the terminal state.
823
+ # How it works is by extracting the statuses of the capsule and workers and
824
+ # then adding them as a part of a state-machine that helps track transitions and
825
+ # helps derive terminal states.
826
+ # We will first keep checking for terminal conditions or outright failure conditions
827
+ # If we reach a teminal condition like described in `DEPLOYMENT_READY_CONDITIONS`, then
828
+ # we will further check for readiness conditions.
750
829
  for i in range(self._create_timeout):
751
830
  time.sleep(1)
752
- capsule_response = self.get()
753
- workers_response = self.get_workers()
754
-
755
- # We first need to check if someone has not upgraded the capsule under the hood and
756
- # the current deployment instance is invalid.
757
- self._backend_version_mismatch_check(
758
- capsule_response, self.current_deployment_instance_version # type: ignore
831
+ capsule_response, _ = self._update_capsule_and_worker_sm(
832
+ state_machine, workers_state_machine, logger
759
833
  )
760
- state_machine.add_status(capsule_response.get("status", {})) # type: ignore
761
- workers_state_machine.add_status(workers_response)
762
- state_machine.report_current_status(logger)
763
-
764
- workers_state_machine.report_current_status(logger)
765
834
  # Deployment readiness checks will determine what is the terminal state
766
835
  # of the workerstate machine. If we detect a terminal state in the workers,
767
836
  # then even if the capsule upgrade is still in progress we will end up crashing
@@ -783,28 +852,29 @@ class CapsuleDeployer:
783
852
  )
784
853
  if capsule_ready or failure_condition_satisfied:
785
854
  logger(
786
- "💊 %s deployment status: %s | worker states: [success :%s | failure :%s ] "
855
+ "💊 %s deployment status: %s "
787
856
  % (
788
857
  self.capsule_type.title(),
789
- "in progress"
790
- if state_machine.update_in_progress
791
- else "completed",
792
- capsule_ready,
793
- failure_condition_satisfied,
858
+ (
859
+ "in progress"
860
+ if state_machine.update_in_progress
861
+ else "completed"
862
+ ),
794
863
  )
795
864
  )
796
865
  _further_readiness_check_failed = False
797
866
  if further_check_worker_readiness:
798
867
  # HACK : monitor the workers for N seconds to make sure they are healthy
799
- # this is a hack. Ideally we should implment a healtcheck as a first class citizen
868
+ # this is a hack. Ideally we should implement a healthcheck as a first class citizen
800
869
  # but it will take some time to do that so in the meanwhile a timeout set on the cli
801
870
  # side will be really helpful.
802
871
  logger(
803
- "💊 running last minute readiness check for %s..."
872
+ "💊 Running last minute readiness check for %s..."
804
873
  % self.identifier
805
874
  )
806
875
  _further_readiness_check_failed = self._monitor_worker_readiness(
807
- workers_state_machine
876
+ workers_state_machine,
877
+ state_machine,
808
878
  )
809
879
 
810
880
  if CAPSULE_DEBUG:
@@ -848,13 +918,18 @@ class CapsuleDeployer:
848
918
 
849
919
  break
850
920
 
851
- if CAPSULE_DEBUG and self._debug_dir:
852
- state_machine.save_debug_info(self._debug_dir)
853
- workers_state_machine.save_debug_info(self._debug_dir)
854
- if i % 3 == 0: # Every 3 seconds report the status
855
- logger(
856
- f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status: {state_machine.current_status} | worker states: {workers_state_machine.current_status}"
857
- )
921
+ self._publish_capsule_debug_info(
922
+ state_machine, workers_state_machine, capsule_response
923
+ )
924
+
925
+ if CAPSULE_DEBUG and i % 3 == 0: # Every 3 seconds report the status
926
+ logger(
927
+ f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status: {state_machine.current_status} | worker states: {workers_state_machine.current_status} | capsule_ready : {capsule_ready} | further_check_worker_readiness {further_check_worker_readiness}"
928
+ )
929
+
930
+ self._publish_capsule_debug_info(
931
+ state_machine, workers_state_machine, capsule_response
932
+ )
858
933
 
859
934
  # We will only check ready_to_serve_traffic under the following conditions:
860
935
  # If the readiness condition is not Async and min_replicas in this deployment
@@ -872,13 +947,6 @@ class CapsuleDeployer:
872
947
  f"Capsule {self.identifier} failed to be ready to serve traffic",
873
948
  )
874
949
 
875
- if CAPSULE_DEBUG and self._debug_dir:
876
- state_machine.save_debug_info(self._debug_dir)
877
- workers_state_machine.save_debug_info(self._debug_dir)
878
- logger(
879
- f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status [on return]: {state_machine.current_status} | worker states: {workers_state_machine.current_status}"
880
- )
881
-
882
950
  return dict(
883
951
  id=self.identifier,
884
952
  auth_type=self.capsule_type,