ob-metaflow-extensions 1.1.151__py2.py3-none-any.whl → 1.6.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow_extensions/outerbounds/__init__.py +1 -1
- metaflow_extensions/outerbounds/plugins/__init__.py +24 -3
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +16 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +128 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +333 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +1029 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +15 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +165 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +966 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +299 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +537 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +1300 -0
- metaflow_extensions/outerbounds/plugins/apps/core/exceptions.py +341 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +123 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +118 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +9 -77
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +7 -78
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +119 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
- metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +32 -8
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +1 -1
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +49 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +37 -7
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +18 -8
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +6 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +45 -18
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +18 -9
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +10 -4
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/remote_config.py +46 -9
- metaflow_extensions/outerbounds/toplevel/apps/__init__.py +9 -0
- metaflow_extensions/outerbounds/toplevel/apps/exceptions.py +11 -0
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +86 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.6.2.dist-info/RECORD +136 -0
- metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
- ob_metaflow_extensions-1.1.151.dist-info/RECORD +0 -74
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.6.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1300 @@
|
|
|
1
|
+
from .config import TypedCoreConfig, TypedDict
|
|
2
|
+
from .perimeters import PerimeterExtractor
|
|
3
|
+
from .capsule import CapsuleApi
|
|
4
|
+
import time
|
|
5
|
+
import os
|
|
6
|
+
import tempfile
|
|
7
|
+
from ._state_machine import DEPLOYMENT_READY_CONDITIONS, LogLine
|
|
8
|
+
from .app_config import AppConfig, AppConfigError
|
|
9
|
+
from .code_package import CodePackager
|
|
10
|
+
from .config import PackagedCode, BakedImage
|
|
11
|
+
from .app_config import CODE_PACKAGE_PREFIX, AuthType
|
|
12
|
+
from .capsule import (
|
|
13
|
+
CapsuleDeployer,
|
|
14
|
+
list_and_filter_capsules,
|
|
15
|
+
_format_url_string,
|
|
16
|
+
)
|
|
17
|
+
from .exceptions import (
|
|
18
|
+
CapsuleDeploymentException,
|
|
19
|
+
CapsuleApiException,
|
|
20
|
+
CapsuleCrashLoopException,
|
|
21
|
+
CapsuleReadinessException,
|
|
22
|
+
CapsuleConcurrentUpgradeException,
|
|
23
|
+
CapsuleDeletedDuringDeploymentException,
|
|
24
|
+
AppConcurrentUpgradeException,
|
|
25
|
+
AppCrashLoopException,
|
|
26
|
+
AppCreationFailedException,
|
|
27
|
+
AppDeletedDuringDeploymentException,
|
|
28
|
+
AppDeploymentException,
|
|
29
|
+
AppNotFoundException,
|
|
30
|
+
AppReadinessException,
|
|
31
|
+
AppUpgradeInProgressException,
|
|
32
|
+
CodePackagingException,
|
|
33
|
+
)
|
|
34
|
+
from .dependencies import ImageBakingException
|
|
35
|
+
from functools import partial
|
|
36
|
+
import sys
|
|
37
|
+
from typing import Dict, List, Optional, Callable, Any
|
|
38
|
+
from datetime import datetime
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _resolve_fast_bakery_url():
|
|
42
|
+
config = PerimeterExtractor.config_during_programmatic_access()
|
|
43
|
+
fast_bakery_url = config.get("METAFLOW_FAST_BAKERY_URL")
|
|
44
|
+
default_container_image = config.get("METAFLOW_KUBERNETES_CONTAINER_IMAGE")
|
|
45
|
+
if fast_bakery_url is None:
|
|
46
|
+
raise ImageBakingException(
|
|
47
|
+
"METAFLOW_FAST_BAKERY_URL is not set. Please set the METAFLOW_FAST_BAKERY_URL environment variable or add it to your metaflow config. Please contact outerbounds support for assistance."
|
|
48
|
+
)
|
|
49
|
+
return fast_bakery_url, default_container_image
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def bake_image(
|
|
53
|
+
pypi: Optional[Dict[str, str]] = None,
|
|
54
|
+
conda: Optional[Dict[str, str]] = None,
|
|
55
|
+
requirements_file: Optional[str] = None,
|
|
56
|
+
pyproject_toml: Optional[str] = None,
|
|
57
|
+
base_image: Optional[str] = None,
|
|
58
|
+
python: Optional[str] = None,
|
|
59
|
+
logger: Optional[Callable[[str], Any]] = None,
|
|
60
|
+
cache_name: Optional[str] = None,
|
|
61
|
+
) -> BakedImage:
|
|
62
|
+
"""
|
|
63
|
+
Bake a Docker image with the specified dependencies.
|
|
64
|
+
|
|
65
|
+
This is a composable building block that can be used standalone or
|
|
66
|
+
combined with AppDeployer to deploy apps with custom images.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
pypi : Dict[str, str], optional
|
|
71
|
+
Dictionary of PyPI packages to install. Keys are package names,
|
|
72
|
+
values are version specifiers. Example: {"flask": ">=2.0", "requests": ""}
|
|
73
|
+
Mutually exclusive with requirements_file and pyproject_toml.
|
|
74
|
+
conda : Dict[str, str], optional
|
|
75
|
+
Dictionary of Conda packages to install.
|
|
76
|
+
requirements_file : str, optional
|
|
77
|
+
Path to a requirements.txt file.
|
|
78
|
+
Mutually exclusive with pypi and pyproject_toml.
|
|
79
|
+
pyproject_toml : str, optional
|
|
80
|
+
Path to a pyproject.toml file.
|
|
81
|
+
Mutually exclusive with pypi and requirements_file.
|
|
82
|
+
base_image : str, optional
|
|
83
|
+
Base Docker image to build from. Defaults to the platform default image.
|
|
84
|
+
python : str, optional
|
|
85
|
+
Python version to use (e.g., "3.11.0"). If None (default), uses the Python
|
|
86
|
+
already present in the base_image and installs dependencies into it. If a
|
|
87
|
+
version is specified, a new Python environment at that version is created
|
|
88
|
+
inside the base image, and all dependencies are installed into it.
|
|
89
|
+
logger : Callable, optional
|
|
90
|
+
Logger function for progress messages.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
BakedImage
|
|
95
|
+
Named tuple containing:
|
|
96
|
+
- image: The baked Docker image URL
|
|
97
|
+
- python_path: Path to Python executable in the image
|
|
98
|
+
|
|
99
|
+
Raises
|
|
100
|
+
------
|
|
101
|
+
ImageBakingException
|
|
102
|
+
If baking fails or if invalid parameters are provided.
|
|
103
|
+
|
|
104
|
+
Examples
|
|
105
|
+
--------
|
|
106
|
+
Bake with PyPI packages:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
result = bake_image(pypi={"flask": ">=2.0", "requests": ""})
|
|
110
|
+
print(result.image)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Bake from requirements.txt:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
result = bake_image(requirements_file="./requirements.txt")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Bake from pyproject.toml:
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
result = bake_image(pyproject_toml="./pyproject.toml")
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Combine with AppDeployer:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from metaflow.apps import bake_image, AppDeployer
|
|
129
|
+
|
|
130
|
+
baked = bake_image(pypi={"flask": ">=2.0"})
|
|
131
|
+
deployer = AppDeployer(name="my-app", port=8080, image=baked.image)
|
|
132
|
+
deployed = deployer.deploy()
|
|
133
|
+
```
|
|
134
|
+
"""
|
|
135
|
+
from metaflow.ob_internal import internal_bake_image as _internal_bake # type: ignore
|
|
136
|
+
from metaflow.plugins.pypi.parsers import (
|
|
137
|
+
requirements_txt_parser,
|
|
138
|
+
pyproject_toml_parser,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
from metaflow.metaflow_config import (
|
|
142
|
+
DEFAULT_DATASTORE,
|
|
143
|
+
get_pinned_conda_libs,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Count how many dependency sources are provided
|
|
147
|
+
dep_sources = sum(
|
|
148
|
+
[
|
|
149
|
+
pypi is not None,
|
|
150
|
+
requirements_file is not None,
|
|
151
|
+
pyproject_toml is not None,
|
|
152
|
+
]
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
fast_bakery_url, default_base_image = _resolve_fast_bakery_url()
|
|
156
|
+
|
|
157
|
+
if dep_sources > 1:
|
|
158
|
+
raise ImageBakingException(
|
|
159
|
+
"Only one of pypi, requirements_file, or pyproject_toml can be specified."
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Set defaults
|
|
163
|
+
_base_image = base_image or default_base_image
|
|
164
|
+
_python_version = python # Keep it None to use image python
|
|
165
|
+
_logger = logger or (lambda x: None)
|
|
166
|
+
_cache_name = cache_name or "default"
|
|
167
|
+
|
|
168
|
+
# Set up cache directory (internal - not exposed to users)
|
|
169
|
+
cache_dir = os.path.join(tempfile.gettempdir(), f"ob-bake-{_cache_name}")
|
|
170
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
171
|
+
cache_file_path = os.path.join(cache_dir, "image_cache")
|
|
172
|
+
|
|
173
|
+
# Collect packages
|
|
174
|
+
pypi_packages: Dict[str, str] = {}
|
|
175
|
+
conda_packages: Dict[str, str] = {}
|
|
176
|
+
|
|
177
|
+
# Parse from file if provided
|
|
178
|
+
if requirements_file:
|
|
179
|
+
if not os.path.exists(requirements_file):
|
|
180
|
+
raise ImageBakingException(
|
|
181
|
+
f"Requirements file not found: {requirements_file}"
|
|
182
|
+
)
|
|
183
|
+
with open(requirements_file, "r") as f:
|
|
184
|
+
parsed = requirements_txt_parser(f.read())
|
|
185
|
+
pypi_packages = parsed.get("packages", {})
|
|
186
|
+
_python_version = parsed.get("python_version", _python_version)
|
|
187
|
+
_logger(f"📦 Parsed {len(pypi_packages)} packages from {requirements_file}")
|
|
188
|
+
|
|
189
|
+
elif pyproject_toml:
|
|
190
|
+
if not os.path.exists(pyproject_toml):
|
|
191
|
+
raise ImageBakingException(f"pyproject.toml not found: {pyproject_toml}")
|
|
192
|
+
with open(pyproject_toml, "r") as f:
|
|
193
|
+
parsed = pyproject_toml_parser(f.read())
|
|
194
|
+
pypi_packages = parsed.get("packages", {})
|
|
195
|
+
_python_version = parsed.get("python_version", _python_version)
|
|
196
|
+
_logger(f"📦 Parsed {len(pypi_packages)} packages from {pyproject_toml}")
|
|
197
|
+
|
|
198
|
+
elif pypi:
|
|
199
|
+
pypi_packages = pypi.copy()
|
|
200
|
+
|
|
201
|
+
if conda:
|
|
202
|
+
conda_packages = conda.copy()
|
|
203
|
+
|
|
204
|
+
# Check if there are any packages to bake
|
|
205
|
+
if not pypi_packages and not conda_packages:
|
|
206
|
+
_logger("⚠️ No packages to bake. Returning base image.")
|
|
207
|
+
return BakedImage(image=_base_image, python_path="python")
|
|
208
|
+
|
|
209
|
+
# Add pinned conda libs required by the platform
|
|
210
|
+
pinned_libs = get_pinned_conda_libs(_python_version, DEFAULT_DATASTORE)
|
|
211
|
+
pypi_packages.update(pinned_libs)
|
|
212
|
+
|
|
213
|
+
_logger(f"🍞 Baking image with {len(pypi_packages)} PyPI packages...")
|
|
214
|
+
|
|
215
|
+
# Call the internal bake function
|
|
216
|
+
fb_response = _internal_bake(
|
|
217
|
+
cache_file_path=cache_file_path,
|
|
218
|
+
pypi_packages=pypi_packages,
|
|
219
|
+
conda_packages=conda_packages,
|
|
220
|
+
ref=_cache_name,
|
|
221
|
+
python=_python_version,
|
|
222
|
+
base_image=_base_image,
|
|
223
|
+
logger=_logger,
|
|
224
|
+
fast_bakery_url=fast_bakery_url,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
if fb_response.failure:
|
|
228
|
+
raise ImageBakingException(f"Failed to bake image: {fb_response.response}")
|
|
229
|
+
|
|
230
|
+
_logger(f"🐳 Baked image: {fb_response.container_image}")
|
|
231
|
+
|
|
232
|
+
return BakedImage(
|
|
233
|
+
image=fb_response.container_image,
|
|
234
|
+
python_path=fb_response.python_path,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def package_code(
|
|
239
|
+
src_paths: List[str],
|
|
240
|
+
suffixes: Optional[List[str]] = None,
|
|
241
|
+
logger: Optional[Callable[[str], Any]] = None,
|
|
242
|
+
) -> PackagedCode:
|
|
243
|
+
"""
|
|
244
|
+
Package code for deployment to the Outerbounds Platform.
|
|
245
|
+
|
|
246
|
+
This is a composable building block that can be used standalone or
|
|
247
|
+
combined with AppDeployer to deploy apps with custom code packages.
|
|
248
|
+
|
|
249
|
+
Parameters
|
|
250
|
+
----------
|
|
251
|
+
src_paths : List[str]
|
|
252
|
+
List of directories to include in the package. All paths must exist
|
|
253
|
+
and be directories.
|
|
254
|
+
suffixes : List[str], optional
|
|
255
|
+
File extensions to include (e.g., [".py", ".json", ".yaml"]).
|
|
256
|
+
If None, uses default suffixes: .py, .txt, .yaml, .yml, .json,
|
|
257
|
+
.html, .css, .js, .jsx, .ts, .tsx, .md, .rst
|
|
258
|
+
logger : Callable, optional
|
|
259
|
+
Logger function for progress messages. Receives a single string argument.
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
PackagedCode
|
|
264
|
+
Named tuple containing:
|
|
265
|
+
- url: The package URL in object storage
|
|
266
|
+
- key: Unique content-addressed key identifying this package
|
|
267
|
+
|
|
268
|
+
Raises
|
|
269
|
+
------
|
|
270
|
+
CodePackagingException
|
|
271
|
+
If packaging fails or if invalid paths are provided.
|
|
272
|
+
|
|
273
|
+
Examples
|
|
274
|
+
--------
|
|
275
|
+
Package a directory:
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
pkg = package_code(src_paths=["./src"])
|
|
279
|
+
print(pkg.url)
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Package multiple directories:
|
|
283
|
+
|
|
284
|
+
```python
|
|
285
|
+
pkg = package_code(src_paths=["./src", "./configs"])
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
Package with specific file types:
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
pkg = package_code(
|
|
292
|
+
src_paths=["./app"],
|
|
293
|
+
suffixes=[".py", ".yaml", ".json"]
|
|
294
|
+
)
|
|
295
|
+
```
|
|
296
|
+
"""
|
|
297
|
+
from metaflow.metaflow_config import DEFAULT_DATASTORE
|
|
298
|
+
|
|
299
|
+
_logger = logger or (lambda x: None)
|
|
300
|
+
|
|
301
|
+
# Validate paths
|
|
302
|
+
for path in src_paths:
|
|
303
|
+
if not os.path.exists(path):
|
|
304
|
+
raise CodePackagingException(f"Source path does not exist: {path}")
|
|
305
|
+
if not os.path.isdir(path):
|
|
306
|
+
raise CodePackagingException(f"Source path is not a directory: {path}")
|
|
307
|
+
|
|
308
|
+
_logger(f"📦 Packaging {len(src_paths)} directory(ies)...")
|
|
309
|
+
|
|
310
|
+
# Create packager and store
|
|
311
|
+
packager = CodePackager(
|
|
312
|
+
datastore_type=DEFAULT_DATASTORE,
|
|
313
|
+
code_package_prefix=CODE_PACKAGE_PREFIX,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
package_url, package_key = packager.store(
|
|
318
|
+
paths_to_include=src_paths,
|
|
319
|
+
file_suffixes=suffixes, # None uses defaults in CodePackager
|
|
320
|
+
)
|
|
321
|
+
except Exception as e:
|
|
322
|
+
raise CodePackagingException(f"Failed to package code: {e}") from e
|
|
323
|
+
|
|
324
|
+
_logger(f"📦 Code package stored: {package_url}")
|
|
325
|
+
|
|
326
|
+
return PackagedCode(url=package_url, key=package_key)
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
class AppDeployer(TypedCoreConfig):
|
|
330
|
+
|
|
331
|
+
__examples__ = """
|
|
332
|
+
Examples
|
|
333
|
+
--------
|
|
334
|
+
Basic deployment with bake_image and package_code:
|
|
335
|
+
|
|
336
|
+
```python
|
|
337
|
+
from metaflow.apps import bake_image, package_code, AppDeployer
|
|
338
|
+
|
|
339
|
+
# Step 1: Bake dependencies into an image
|
|
340
|
+
baked = bake_image(pypi={"flask": ">=2.0", "requests": ""})
|
|
341
|
+
|
|
342
|
+
# Step 2: Package your application code
|
|
343
|
+
pkg = package_code(src_paths=["./src"])
|
|
344
|
+
|
|
345
|
+
# Step 3: Create deployer and deploy
|
|
346
|
+
deployer = AppDeployer(
|
|
347
|
+
name="my-flask-app",
|
|
348
|
+
port=8000,
|
|
349
|
+
image=baked.image,
|
|
350
|
+
code_package=pkg,
|
|
351
|
+
commands=["python server.py"],
|
|
352
|
+
replicas={"min": 1, "max": 3},
|
|
353
|
+
resources={"cpu": "1", "memory": "2048Mi"},
|
|
354
|
+
)
|
|
355
|
+
deployed = deployer.deploy()
|
|
356
|
+
print(deployed.public_url)
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
Deployment with API authentication:
|
|
360
|
+
|
|
361
|
+
```python
|
|
362
|
+
deployer = AppDeployer(
|
|
363
|
+
name="my-api",
|
|
364
|
+
port=8000,
|
|
365
|
+
image=baked.image,
|
|
366
|
+
code_package=pkg,
|
|
367
|
+
commands=["python api.py"],
|
|
368
|
+
auth={"type": "API"},
|
|
369
|
+
)
|
|
370
|
+
deployed = deployer.deploy()
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
Deployment with environment variables and secrets:
|
|
374
|
+
|
|
375
|
+
```python
|
|
376
|
+
deployer = AppDeployer(
|
|
377
|
+
name="my-app",
|
|
378
|
+
port=8000,
|
|
379
|
+
image=baked.image,
|
|
380
|
+
code_package=pkg,
|
|
381
|
+
commands=["python app.py"],
|
|
382
|
+
environment={"DEBUG": "false", "LOG_LEVEL": "info"},
|
|
383
|
+
secrets=["my-api-keys"],
|
|
384
|
+
)
|
|
385
|
+
deployed = deployer.deploy()
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
Interacting with a deployed app:
|
|
389
|
+
|
|
390
|
+
```python
|
|
391
|
+
# Get app info
|
|
392
|
+
info = deployed.info()
|
|
393
|
+
|
|
394
|
+
# Get logs from all workers
|
|
395
|
+
logs = deployed.logs()
|
|
396
|
+
|
|
397
|
+
# Scale to zero workers
|
|
398
|
+
deployed.scale_to_zero()
|
|
399
|
+
|
|
400
|
+
# Delete the app
|
|
401
|
+
deployed.delete()
|
|
402
|
+
```
|
|
403
|
+
"""
|
|
404
|
+
|
|
405
|
+
__doc__ = (
|
|
406
|
+
"""Programmatic API For deploying Outerbounds Apps.\n"""
|
|
407
|
+
+ TypedCoreConfig.__doc__
|
|
408
|
+
+ __examples__
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
__init__ = TypedCoreConfig.__init__
|
|
412
|
+
|
|
413
|
+
_app_config: AppConfig
|
|
414
|
+
|
|
415
|
+
# What is `_state` ?
|
|
416
|
+
# `_state` is a dictionary that will hold all information that might need
|
|
417
|
+
# to be passed down without the user explicity setting them.
|
|
418
|
+
# Setting `_state` will ensure that values are explicity passed down from
|
|
419
|
+
# top level when the class is used under different context.
|
|
420
|
+
# So for example if we need to set some things like project/branches etc
|
|
421
|
+
# during metaflow context, we can do so easily. We also like to set state like
|
|
422
|
+
# perimeters at class level since users current cannot also switch perimeters within
|
|
423
|
+
# the same interpreter.
|
|
424
|
+
_state = {}
|
|
425
|
+
|
|
426
|
+
__state_items = [
|
|
427
|
+
# perimeter and api_url come from config setups
|
|
428
|
+
# need to happen before AppDeployer and need to
|
|
429
|
+
# come from _set_state
|
|
430
|
+
"perimeter",
|
|
431
|
+
"api_url",
|
|
432
|
+
# code package URL / code package key
|
|
433
|
+
# can come from CodePackager so its fine
|
|
434
|
+
# if its in _set_state
|
|
435
|
+
"code_package_url",
|
|
436
|
+
"code_package_key",
|
|
437
|
+
# Image can be explicitly set by the user
|
|
438
|
+
# or requre some external fast-bakery API
|
|
439
|
+
"image",
|
|
440
|
+
# project/branch have to come from _set_state
|
|
441
|
+
# if users do this through current. Otherwise
|
|
442
|
+
# can come from the
|
|
443
|
+
"project",
|
|
444
|
+
"branch",
|
|
445
|
+
]
|
|
446
|
+
|
|
447
|
+
def _init(self):
|
|
448
|
+
perimeter, api_url = PerimeterExtractor.during_programmatic_access()
|
|
449
|
+
self._set_state("perimeter", perimeter)
|
|
450
|
+
self._set_state("api_url", api_url)
|
|
451
|
+
|
|
452
|
+
@property
|
|
453
|
+
def _deploy_config(self) -> AppConfig:
|
|
454
|
+
if not hasattr(self, "_app_config"):
|
|
455
|
+
self._app_config = AppConfig(self._config)
|
|
456
|
+
return self._app_config
|
|
457
|
+
|
|
458
|
+
# Things that need to be set before deploy
|
|
459
|
+
@classmethod
|
|
460
|
+
def _set_state(cls, key, value):
|
|
461
|
+
cls._state[key] = value
|
|
462
|
+
|
|
463
|
+
def deploy(
|
|
464
|
+
self,
|
|
465
|
+
readiness_condition: str = DEPLOYMENT_READY_CONDITIONS.ATLEAST_ONE_RUNNING,
|
|
466
|
+
max_wait_time=600,
|
|
467
|
+
readiness_wait_time=10,
|
|
468
|
+
logger_fn=partial(print, file=sys.stderr),
|
|
469
|
+
**kwargs,
|
|
470
|
+
) -> "DeployedApp":
|
|
471
|
+
"""
|
|
472
|
+
Deploy the app to the Outerbounds Platform.
|
|
473
|
+
|
|
474
|
+
This method packages and deploys the configured app, waiting for it to reach
|
|
475
|
+
the specified readiness condition before returning.
|
|
476
|
+
|
|
477
|
+
Parameters
|
|
478
|
+
----------
|
|
479
|
+
readiness_condition : str, optional
|
|
480
|
+
The condition that must be met for the deployment to be considered ready.
|
|
481
|
+
Default is ATLEAST_ONE_RUNNING.
|
|
482
|
+
|
|
483
|
+
Deployment ready conditions define what is considered a successful completion
|
|
484
|
+
of the current deployment instance. This allows users or platform designers
|
|
485
|
+
to configure the criteria for deployment readiness.
|
|
486
|
+
|
|
487
|
+
Why do we need deployment readiness conditions?
|
|
488
|
+
- Deployments might be taking place from a CI/CD-esque environment.
|
|
489
|
+
In these setups, the downstream build triggers might be depending on
|
|
490
|
+
a specific criteria for deployment completion. Having readiness conditions
|
|
491
|
+
allows the CI/CD systems to get a signal of when the deployment is ready.
|
|
492
|
+
- Users might be calling the deployment API under different conditions:
|
|
493
|
+
- Some users might want a cluster of workers ready before serving
|
|
494
|
+
traffic while others might want just one worker ready to start
|
|
495
|
+
serving traffic.
|
|
496
|
+
|
|
497
|
+
Available readiness conditions:
|
|
498
|
+
|
|
499
|
+
ATLEAST_ONE_RUNNING ("at_least_one_running")
|
|
500
|
+
At least min(min_replicas, 1) workers of the current deployment
|
|
501
|
+
instance's version have started running.
|
|
502
|
+
Usecase: Some endpoints may be deployed ephemerally and are considered
|
|
503
|
+
ready when at least one instance is running; additional instances are
|
|
504
|
+
for load management.
|
|
505
|
+
|
|
506
|
+
ALL_RUNNING ("all_running")
|
|
507
|
+
At least min_replicas number of workers are running for the deployment
|
|
508
|
+
to be considered ready.
|
|
509
|
+
Usecase: Operators may require that all replicas are available before
|
|
510
|
+
traffic is routed. Needed when inference endpoints may be under some
|
|
511
|
+
SLA or require a larger load.
|
|
512
|
+
|
|
513
|
+
FULLY_FINISHED ("fully_finished")
|
|
514
|
+
At least min_replicas number of workers are running for the deployment
|
|
515
|
+
and there are no pending or crashlooping workers from previous versions
|
|
516
|
+
lying around.
|
|
517
|
+
Usecase: Ensuring endpoint is fully available and no other versions are
|
|
518
|
+
running or endpoint has been fully scaled down.
|
|
519
|
+
|
|
520
|
+
ASYNC ("async")
|
|
521
|
+
The deployment will be assumed ready as soon as the server acknowledges
|
|
522
|
+
it has registered the app in the backend.
|
|
523
|
+
Usecase: Operators may only care that the URL is minted for the deployment
|
|
524
|
+
or the operator wants the deployment to eventually scale down to 0.
|
|
525
|
+
|
|
526
|
+
max_wait_time : int, optional
|
|
527
|
+
Maximum time in seconds to wait for the deployment to reach readiness.
|
|
528
|
+
Default is 600 (10 minutes).
|
|
529
|
+
|
|
530
|
+
readiness_wait_time : int, optional
|
|
531
|
+
Time in seconds to wait between readiness checks. Default is 10.
|
|
532
|
+
|
|
533
|
+
logger_fn : Callable, optional
|
|
534
|
+
Function to use for logging progress messages. Default prints to stderr.
|
|
535
|
+
|
|
536
|
+
Returns
|
|
537
|
+
-------
|
|
538
|
+
DeployedApp
|
|
539
|
+
An object representing the deployed app with methods to interact with it
|
|
540
|
+
(logs, info, scale_to_zero, delete, etc.) and properties like public_url.
|
|
541
|
+
|
|
542
|
+
Raises
|
|
543
|
+
------
|
|
544
|
+
CodePackagingException
|
|
545
|
+
If code_package is not provided or is not a valid PackagedCode instance.
|
|
546
|
+
|
|
547
|
+
AppConfigError
|
|
548
|
+
If the app configuration is invalid.
|
|
549
|
+
|
|
550
|
+
AppCreationFailedException
|
|
551
|
+
If the app deployment submission fails due to an API error.
|
|
552
|
+
Contains status_code and error_text attributes for debugging.
|
|
553
|
+
|
|
554
|
+
AppCrashLoopException
|
|
555
|
+
If a worker enters CrashLoopBackOff or Failed state during deployment.
|
|
556
|
+
Contains worker_id and logs attributes for debugging.
|
|
557
|
+
|
|
558
|
+
AppReadinessException
|
|
559
|
+
If the app fails to meet readiness conditions within max_wait_time.
|
|
560
|
+
|
|
561
|
+
AppUpgradeInProgressException
|
|
562
|
+
If an upgrade is already in progress when deployment starts.
|
|
563
|
+
Use force_upgrade=True to override. Contains upgrader attribute.
|
|
564
|
+
|
|
565
|
+
AppConcurrentUpgradeException
|
|
566
|
+
If another deployment was triggered while this deployment was in progress,
|
|
567
|
+
invalidating the current deployment. Contains expected_version and actual_version.
|
|
568
|
+
|
|
569
|
+
OuterboundsBackendUnhealthyException
|
|
570
|
+
If the Outerbounds backend is unreachable (network issues, DNS failures) or
|
|
571
|
+
returns server errors (HTTP 5xx). This indicates a platform-side issue, not a
|
|
572
|
+
problem with your configuration. Retry the deployment or contact Outerbounds support.
|
|
573
|
+
|
|
574
|
+
AppDeletedDuringDeploymentException
|
|
575
|
+
If the app was deleted by another process or user while this deployment was
|
|
576
|
+
in progress. This can occur when concurrent operations conflict.
|
|
577
|
+
|
|
578
|
+
Examples
|
|
579
|
+
--------
|
|
580
|
+
Basic deployment:
|
|
581
|
+
|
|
582
|
+
```python
|
|
583
|
+
from metaflow.apps import bake_image, package_code, AppDeployer
|
|
584
|
+
baked = bake_image(pypi={"flask": ">=2.0"})
|
|
585
|
+
pkg = package_code(src_paths=["./src"])
|
|
586
|
+
deployer = AppDeployer(
|
|
587
|
+
name="my-app",
|
|
588
|
+
port=8000,
|
|
589
|
+
image=baked.image,
|
|
590
|
+
code_package=pkg,
|
|
591
|
+
commands=["python server.py"],
|
|
592
|
+
)
|
|
593
|
+
deployed = deployer.deploy()
|
|
594
|
+
print(deployed.public_url)
|
|
595
|
+
```
|
|
596
|
+
|
|
597
|
+
Wait for all replicas to be ready:
|
|
598
|
+
|
|
599
|
+
```python
|
|
600
|
+
deployed = deployer.deploy(
|
|
601
|
+
readiness_condition="all_running"
|
|
602
|
+
)
|
|
603
|
+
```
|
|
604
|
+
|
|
605
|
+
Async deployment (don't wait for workers):
|
|
606
|
+
|
|
607
|
+
```python
|
|
608
|
+
deployed = deployer.deploy(
|
|
609
|
+
readiness_condition="async"
|
|
610
|
+
)
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
Handling deployment errors:
|
|
614
|
+
|
|
615
|
+
```python
|
|
616
|
+
from metaflow.apps import AppDeployer
|
|
617
|
+
from metaflow.apps.exceptions import (
|
|
618
|
+
AppReadinessException,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
try:
|
|
622
|
+
deployed = deployer.deploy()
|
|
623
|
+
except AppReadinessException as e:
|
|
624
|
+
print(f"App {e.app_id} failed to become ready in time but we can move forward")
|
|
625
|
+
deployed_app:DeployedApp = e.deployed_app
|
|
626
|
+
# use DeployedApp to do what ever you need
|
|
627
|
+
```
|
|
628
|
+
"""
|
|
629
|
+
if len(self._state.get("default_tags", [])) > 0:
|
|
630
|
+
self._deploy_config._core_config.tags = (
|
|
631
|
+
self._deploy_config._core_config.tags or []
|
|
632
|
+
) + self._state["default_tags"]
|
|
633
|
+
|
|
634
|
+
# Handle code_package if provided - extract url and key to state
|
|
635
|
+
code_package = getattr(self._deploy_config._core_config, "code_package", None)
|
|
636
|
+
if code_package is not None:
|
|
637
|
+
# Validate that code_package is a PackagedCode namedtuple
|
|
638
|
+
if not isinstance(code_package, PackagedCode):
|
|
639
|
+
raise CodePackagingException(
|
|
640
|
+
f"code_package must be a PackagedCode instance returned by package_code(). "
|
|
641
|
+
f"Got {type(code_package).__name__} instead.\n\n"
|
|
642
|
+
"Use package_code() to create a valid code package:\n\n"
|
|
643
|
+
" from metaflow.apps import package_code, AppDeployer\n\n"
|
|
644
|
+
" pkg = package_code(src_paths=['./src'])\n"
|
|
645
|
+
" deployer = AppDeployer(..., code_package=pkg)\n"
|
|
646
|
+
)
|
|
647
|
+
self._set_state("code_package_url", code_package.url)
|
|
648
|
+
self._set_state("code_package_key", code_package.key)
|
|
649
|
+
# Clear the code_package field to avoid serialization issues
|
|
650
|
+
self._deploy_config._core_config.code_package = None
|
|
651
|
+
|
|
652
|
+
# Verify code_package is present (either from code_package param or from state)
|
|
653
|
+
if (
|
|
654
|
+
self._state.get("code_package_url") is None
|
|
655
|
+
and self._deploy_config.get_state("code_package_url") is None
|
|
656
|
+
):
|
|
657
|
+
raise CodePackagingException(
|
|
658
|
+
"code_package is required for deployment. "
|
|
659
|
+
"Use package_code() to create a code package:\n\n"
|
|
660
|
+
" from metaflow.apps import package_code, AppDeployer\n\n"
|
|
661
|
+
" pkg = package_code(src_paths=['./src'])\n"
|
|
662
|
+
" deployer = AppDeployer(..., code_package=pkg)\n"
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
self._deploy_config.commit()
|
|
666
|
+
# Set any state that might have been passed down from the top level
|
|
667
|
+
for k in self.__state_items:
|
|
668
|
+
if self._deploy_config.get_state(k) is None and (
|
|
669
|
+
k in self._state and self._state[k] is not None
|
|
670
|
+
):
|
|
671
|
+
self._deploy_config.set_state(k, self._state[k])
|
|
672
|
+
|
|
673
|
+
capsule = CapsuleDeployer(
|
|
674
|
+
self._deploy_config,
|
|
675
|
+
self._state["api_url"],
|
|
676
|
+
create_timeout=max_wait_time,
|
|
677
|
+
debug_dir=None,
|
|
678
|
+
success_terminal_state_condition=readiness_condition,
|
|
679
|
+
readiness_wait_time=readiness_wait_time,
|
|
680
|
+
logger_fn=logger_fn,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
currently_present_capsules = list_and_filter_capsules(
|
|
684
|
+
capsule.capsule_api,
|
|
685
|
+
None,
|
|
686
|
+
None,
|
|
687
|
+
capsule.name,
|
|
688
|
+
None,
|
|
689
|
+
None,
|
|
690
|
+
None,
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
force_upgrade = self._deploy_config.get_state("force_upgrade", False)
|
|
694
|
+
|
|
695
|
+
if len(currently_present_capsules) > 0:
|
|
696
|
+
# Only update the capsule if there is no upgrade in progress
|
|
697
|
+
# Only update a "already updating" capsule if the `--force-upgrade` flag is provided.
|
|
698
|
+
_curr_cap = currently_present_capsules[0]
|
|
699
|
+
this_capsule_is_being_updated = _curr_cap.get("status", {}).get(
|
|
700
|
+
"updateInProgress", False
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
if this_capsule_is_being_updated and not force_upgrade:
|
|
704
|
+
_upgrader = _curr_cap.get("metadata", {}).get("lastModifiedBy", None)
|
|
705
|
+
raise AppUpgradeInProgressException(
|
|
706
|
+
app_id=_curr_cap.get("id"),
|
|
707
|
+
upgrader=_upgrader,
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
logger_fn(
|
|
711
|
+
f"🚀 {'' if not force_upgrade else 'Force'} Upgrading {capsule.capsule_type.lower()} `{capsule.name}`....",
|
|
712
|
+
)
|
|
713
|
+
else:
|
|
714
|
+
logger_fn(
|
|
715
|
+
f"🚀 Deploying {capsule.capsule_type.lower()} `{capsule.name}`....",
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
try:
|
|
719
|
+
capsule.create()
|
|
720
|
+
except CapsuleApiException as e:
|
|
721
|
+
raise AppCreationFailedException(
|
|
722
|
+
app_name=capsule.name,
|
|
723
|
+
status_code=e.status_code,
|
|
724
|
+
error_text=e.text,
|
|
725
|
+
) from e
|
|
726
|
+
try:
|
|
727
|
+
final_status = capsule.wait_for_terminal_state()
|
|
728
|
+
except CapsuleCrashLoopException as e:
|
|
729
|
+
raise AppCrashLoopException(
|
|
730
|
+
app_id=e.capsule_id,
|
|
731
|
+
worker_id=e.worker_id,
|
|
732
|
+
logs=e.logs,
|
|
733
|
+
) from e
|
|
734
|
+
except CapsuleReadinessException as e:
|
|
735
|
+
raise AppReadinessException(
|
|
736
|
+
app_id=e.capsule_id,
|
|
737
|
+
) from e
|
|
738
|
+
except CapsuleConcurrentUpgradeException as e:
|
|
739
|
+
raise AppConcurrentUpgradeException(
|
|
740
|
+
app_id=e.capsule_id,
|
|
741
|
+
expected_version=e.expected_version,
|
|
742
|
+
actual_version=e.actual_version,
|
|
743
|
+
modified_by=e.modified_by,
|
|
744
|
+
modified_at=e.modified_at,
|
|
745
|
+
) from e
|
|
746
|
+
except CapsuleDeletedDuringDeploymentException as e:
|
|
747
|
+
raise AppDeletedDuringDeploymentException(
|
|
748
|
+
app_id=e.capsule_id,
|
|
749
|
+
) from e
|
|
750
|
+
|
|
751
|
+
return DeployedApp(
|
|
752
|
+
final_status["id"],
|
|
753
|
+
final_status["auth_type"],
|
|
754
|
+
_format_url_string(final_status["public_url"], True),
|
|
755
|
+
final_status["name"],
|
|
756
|
+
final_status["deployed_version"],
|
|
757
|
+
final_status["deployed_at"],
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
@classmethod
|
|
761
|
+
def list_deployments(
|
|
762
|
+
cls,
|
|
763
|
+
name: str = None,
|
|
764
|
+
project: str = None,
|
|
765
|
+
branch: str = None,
|
|
766
|
+
tags: List[Dict[str, str]] = None,
|
|
767
|
+
) -> List["DeployedApp"]:
|
|
768
|
+
"""
|
|
769
|
+
List deployed apps, optionally filtered by name, project, branch, or tags.
|
|
770
|
+
|
|
771
|
+
Parameters
|
|
772
|
+
----------
|
|
773
|
+
name : str, optional
|
|
774
|
+
Filter by app name.
|
|
775
|
+
project : str, optional
|
|
776
|
+
Filter by project name.
|
|
777
|
+
branch : str, optional
|
|
778
|
+
Filter by branch name.
|
|
779
|
+
tags : List[Dict[str, str]], optional
|
|
780
|
+
Filter by tags. Each tag is a dict with a single key-value pair,
|
|
781
|
+
e.g., [{"env": "prod"}] or [{"team": "ml"}, {"version": "v2"}].
|
|
782
|
+
Apps must have all specified tags to match.
|
|
783
|
+
|
|
784
|
+
Returns
|
|
785
|
+
-------
|
|
786
|
+
List[DeployedApp]
|
|
787
|
+
List of deployed apps matching the filters.
|
|
788
|
+
|
|
789
|
+
Examples
|
|
790
|
+
--------
|
|
791
|
+
List all apps:
|
|
792
|
+
|
|
793
|
+
```python
|
|
794
|
+
apps = AppDeployer.list_deployments()
|
|
795
|
+
```
|
|
796
|
+
|
|
797
|
+
Filter by name:
|
|
798
|
+
|
|
799
|
+
```python
|
|
800
|
+
apps = AppDeployer.list_deployments(name="my-app")
|
|
801
|
+
```
|
|
802
|
+
|
|
803
|
+
Filter by project and branch:
|
|
804
|
+
|
|
805
|
+
```python
|
|
806
|
+
apps = AppDeployer.list_deployments(project="ml-pipeline", branch="main")
|
|
807
|
+
```
|
|
808
|
+
|
|
809
|
+
Filter by a single tag:
|
|
810
|
+
|
|
811
|
+
```python
|
|
812
|
+
apps = AppDeployer.list_deployments(tags=[{"env": "prod"}])
|
|
813
|
+
```
|
|
814
|
+
|
|
815
|
+
Filter by multiple tags (AND logic - must match all):
|
|
816
|
+
|
|
817
|
+
```python
|
|
818
|
+
apps = AppDeployer.list_deployments(tags=[{"env": "prod"}, {"team": "ml"}])
|
|
819
|
+
```
|
|
820
|
+
|
|
821
|
+
Combine filters:
|
|
822
|
+
|
|
823
|
+
```python
|
|
824
|
+
apps = AppDeployer.list_deployments(
|
|
825
|
+
project="recommendations",
|
|
826
|
+
tags=[{"env": "staging"}]
|
|
827
|
+
)
|
|
828
|
+
```
|
|
829
|
+
"""
|
|
830
|
+
# Transform tags from {key: value} to {"key": key, "value": value}
|
|
831
|
+
transformed_tags = None
|
|
832
|
+
if tags:
|
|
833
|
+
transformed_tags = [
|
|
834
|
+
{"key": k, "value": v} for tag in tags for k, v in tag.items()
|
|
835
|
+
]
|
|
836
|
+
|
|
837
|
+
capsule_api = DeployedApp._get_capsule_api()
|
|
838
|
+
list_of_capsules = list_and_filter_capsules(
|
|
839
|
+
capsule_api,
|
|
840
|
+
project=project,
|
|
841
|
+
branch=branch,
|
|
842
|
+
name=name,
|
|
843
|
+
tags=transformed_tags,
|
|
844
|
+
auth_type=None,
|
|
845
|
+
capsule_id=None,
|
|
846
|
+
)
|
|
847
|
+
apps = []
|
|
848
|
+
for cap in list_of_capsules:
|
|
849
|
+
apps.append(DeployedApp._from_capsule(cap))
|
|
850
|
+
return apps
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
class TTLCachedObject:
|
|
854
|
+
"""
|
|
855
|
+
Caches a value with a time-to-live (TTL) per instance.
|
|
856
|
+
Returns None if accessed after TTL has expired.
|
|
857
|
+
"""
|
|
858
|
+
|
|
859
|
+
def __init__(self, ttl_seconds: float):
|
|
860
|
+
self._ttl = ttl_seconds
|
|
861
|
+
self._attr_name = None
|
|
862
|
+
|
|
863
|
+
def __set_name__(self, owner, name):
|
|
864
|
+
self._attr_name = f"_ttl_cache_{name}"
|
|
865
|
+
|
|
866
|
+
def __get__(self, instance, owner):
|
|
867
|
+
if instance is None:
|
|
868
|
+
return self
|
|
869
|
+
cache = getattr(instance, self._attr_name, None)
|
|
870
|
+
if cache is None:
|
|
871
|
+
return None
|
|
872
|
+
value, last_set = cache
|
|
873
|
+
if (time.time() - last_set) > self._ttl:
|
|
874
|
+
return None
|
|
875
|
+
return value
|
|
876
|
+
|
|
877
|
+
def __set__(self, instance, val):
|
|
878
|
+
setattr(instance, self._attr_name, (val, time.time()))
|
|
879
|
+
|
|
880
|
+
def __delete__(self, instance):
|
|
881
|
+
if hasattr(instance, self._attr_name):
|
|
882
|
+
delattr(instance, self._attr_name)
|
|
883
|
+
|
|
884
|
+
|
|
885
|
+
class DeployedApp:
|
|
886
|
+
"""
|
|
887
|
+
A deployed app on the Outerbounds Platform.
|
|
888
|
+
|
|
889
|
+
Obtain instances via `AppDeployer.deploy()` or `AppDeployer.list_deployments()`.
|
|
890
|
+
|
|
891
|
+
Examples
|
|
892
|
+
--------
|
|
893
|
+
After deployment:
|
|
894
|
+
|
|
895
|
+
```python
|
|
896
|
+
deployed = deployer.deploy()
|
|
897
|
+
print(deployed.public_url)
|
|
898
|
+
```
|
|
899
|
+
|
|
900
|
+
After listing:
|
|
901
|
+
|
|
902
|
+
```python
|
|
903
|
+
apps = AppDeployer.list_deployments(tags=[{"env": "staging"}])
|
|
904
|
+
for app in apps:
|
|
905
|
+
print(f"{app.name}: {app.public_url}")
|
|
906
|
+
```
|
|
907
|
+
|
|
908
|
+
Inspect and manage:
|
|
909
|
+
|
|
910
|
+
```python
|
|
911
|
+
# Get logs
|
|
912
|
+
for worker_id, lines in deployed.logs().items():
|
|
913
|
+
print(f"Worker {worker_id}: {len(lines)} log lines")
|
|
914
|
+
|
|
915
|
+
# Scale down
|
|
916
|
+
deployed.scale_to_zero()
|
|
917
|
+
|
|
918
|
+
# Clean up
|
|
919
|
+
deployed.delete()
|
|
920
|
+
```
|
|
921
|
+
|
|
922
|
+
Make authenticated requests (API auth):
|
|
923
|
+
|
|
924
|
+
```python
|
|
925
|
+
import requests
|
|
926
|
+
response = requests.get(deployed.public_url, headers=deployed.auth())
|
|
927
|
+
```
|
|
928
|
+
"""
|
|
929
|
+
|
|
930
|
+
# Keep a 3ish second TTL so that we can be gentler
|
|
931
|
+
# the backend API.
|
|
932
|
+
_capsule_info_cached = TTLCachedObject(3)
|
|
933
|
+
|
|
934
|
+
def __init__(
|
|
935
|
+
self,
|
|
936
|
+
_id: str,
|
|
937
|
+
capsule_type: str,
|
|
938
|
+
public_url: str,
|
|
939
|
+
name: str,
|
|
940
|
+
deployed_version: str,
|
|
941
|
+
deployed_at: str,
|
|
942
|
+
):
|
|
943
|
+
self._id = _id # This ID is the capsule's ID
|
|
944
|
+
self._capsule_type = capsule_type
|
|
945
|
+
self._public_url = public_url
|
|
946
|
+
self._name = name
|
|
947
|
+
self._deployed_version = deployed_version
|
|
948
|
+
self._deployed_at = deployed_at
|
|
949
|
+
|
|
950
|
+
@classmethod
|
|
951
|
+
def _get_capsule_api(cls) -> CapsuleApi:
|
|
952
|
+
perimeter, api_server = PerimeterExtractor.during_programmatic_access()
|
|
953
|
+
# In this setting capsules maybe getting managed/deployed in a remote
|
|
954
|
+
# programmatic setting where the user might have no "hands-on" control
|
|
955
|
+
# In those situations its better to retry to 5xx errors
|
|
956
|
+
return CapsuleApi(api_server, perimeter, retry_500s=True)
|
|
957
|
+
|
|
958
|
+
@property
|
|
959
|
+
def _capsule_info(self):
|
|
960
|
+
# self._capsule_info_cached will be None every 3ish seconds
|
|
961
|
+
if self._capsule_info_cached is not None:
|
|
962
|
+
return self._capsule_info_cached
|
|
963
|
+
self._capsule_info_cached = self.info()
|
|
964
|
+
return self._capsule_info_cached
|
|
965
|
+
|
|
966
|
+
@classmethod
|
|
967
|
+
def _from_capsule(cls, capsule: dict) -> "DeployedApp":
|
|
968
|
+
capsule_id = capsule.get("id")
|
|
969
|
+
capsule_type = (
|
|
970
|
+
capsule.get("spec", {}).get("authConfig", {}).get("authType", None)
|
|
971
|
+
)
|
|
972
|
+
status = capsule.get("status", {})
|
|
973
|
+
if status is None:
|
|
974
|
+
status = {}
|
|
975
|
+
public_url = status.get("accessInfo", {}).get("outOfClusterURL", None)
|
|
976
|
+
name = capsule.get("spec", {}).get(
|
|
977
|
+
"displayName",
|
|
978
|
+
)
|
|
979
|
+
deployed_version = capsule.get(
|
|
980
|
+
"version",
|
|
981
|
+
)
|
|
982
|
+
deployed_at = capsule.get("metadata", {}).get(
|
|
983
|
+
"lastModifiedAt",
|
|
984
|
+
capsule.get("metadata", {}).get(
|
|
985
|
+
"createdAt",
|
|
986
|
+
),
|
|
987
|
+
)
|
|
988
|
+
if any(i is None for i in [capsule_type, name]):
|
|
989
|
+
raise ValueError(f"Invalid capsule id: {capsule_id}")
|
|
990
|
+
cpsule = cls(
|
|
991
|
+
capsule_id,
|
|
992
|
+
capsule_type,
|
|
993
|
+
public_url if public_url is None else _format_url_string(public_url, True),
|
|
994
|
+
name,
|
|
995
|
+
deployed_version,
|
|
996
|
+
deployed_at,
|
|
997
|
+
)
|
|
998
|
+
|
|
999
|
+
cpsule._capsule_info_cached = capsule
|
|
1000
|
+
return cpsule
|
|
1001
|
+
|
|
1002
|
+
@classmethod
|
|
1003
|
+
def _from_capsule_id(cls, capsule_id: str) -> "DeployedApp":
|
|
1004
|
+
try:
|
|
1005
|
+
capsule_api = cls._get_capsule_api()
|
|
1006
|
+
capsule = capsule_api.get(capsule_id)
|
|
1007
|
+
except CapsuleApiException as e:
|
|
1008
|
+
if e.status_code == 404:
|
|
1009
|
+
raise AppNotFoundException("App with id '%s' could not be found") from e
|
|
1010
|
+
raise
|
|
1011
|
+
|
|
1012
|
+
return cls._from_capsule(capsule)
|
|
1013
|
+
|
|
1014
|
+
def logs(self, previous: bool = False) -> Dict[str, List[LogLine]]:
|
|
1015
|
+
"""
|
|
1016
|
+
Get logs from all worker replicas.
|
|
1017
|
+
|
|
1018
|
+
Parameters
|
|
1019
|
+
----------
|
|
1020
|
+
previous : bool, optional
|
|
1021
|
+
If True, returns logs from the previous execution of workers.
|
|
1022
|
+
Useful for debugging crashlooping workers. Default is False.
|
|
1023
|
+
|
|
1024
|
+
Returns
|
|
1025
|
+
-------
|
|
1026
|
+
Dict[str, List[LogLine]]
|
|
1027
|
+
Dictionary mapping worker IDs to their log lines.
|
|
1028
|
+
|
|
1029
|
+
Examples
|
|
1030
|
+
--------
|
|
1031
|
+
```python
|
|
1032
|
+
# Get current logs
|
|
1033
|
+
logs = deployed.logs()
|
|
1034
|
+
for worker_id, lines in logs.items():
|
|
1035
|
+
print(f"Worker {worker_id}:")
|
|
1036
|
+
for line in lines:
|
|
1037
|
+
print(f" {line}")
|
|
1038
|
+
|
|
1039
|
+
# Get logs from crashed workers
|
|
1040
|
+
previous_logs = deployed.logs(previous=True)
|
|
1041
|
+
```
|
|
1042
|
+
"""
|
|
1043
|
+
capsule_api = self._get_capsule_api()
|
|
1044
|
+
# extract workers from capsule
|
|
1045
|
+
workers = capsule_api.get_workers(self._id)
|
|
1046
|
+
# get logs from workers
|
|
1047
|
+
logs = {
|
|
1048
|
+
# worker_id: logs
|
|
1049
|
+
}
|
|
1050
|
+
for worker in workers:
|
|
1051
|
+
# TODO: Handle exceptions better over here.
|
|
1052
|
+
logs[worker["workerId"]] = capsule_api.logs(
|
|
1053
|
+
self._id, worker["workerId"], previous=previous
|
|
1054
|
+
)
|
|
1055
|
+
return logs
|
|
1056
|
+
|
|
1057
|
+
def info(self) -> dict:
|
|
1058
|
+
"""
|
|
1059
|
+
Get detailed information about the deployed app.
|
|
1060
|
+
|
|
1061
|
+
Returns
|
|
1062
|
+
-------
|
|
1063
|
+
dict
|
|
1064
|
+
Dictionary containing full app details including spec, status,
|
|
1065
|
+
metadata, and configuration.
|
|
1066
|
+
|
|
1067
|
+
Examples
|
|
1068
|
+
--------
|
|
1069
|
+
```python
|
|
1070
|
+
info = deployed.info()
|
|
1071
|
+
print(f"Status: {info.get('status')}")
|
|
1072
|
+
print(f"Spec: {info.get('spec')}")
|
|
1073
|
+
```
|
|
1074
|
+
"""
|
|
1075
|
+
capsule_api = self._get_capsule_api()
|
|
1076
|
+
capsule = capsule_api.get(self._id)
|
|
1077
|
+
return capsule
|
|
1078
|
+
|
|
1079
|
+
def replicas(self) -> List[dict]:
|
|
1080
|
+
"""
|
|
1081
|
+
List all active worker replicas for this app.
|
|
1082
|
+
|
|
1083
|
+
Returns
|
|
1084
|
+
-------
|
|
1085
|
+
List[dict]
|
|
1086
|
+
List of dictionaries containing worker information including
|
|
1087
|
+
workerId, status, and other metadata.
|
|
1088
|
+
|
|
1089
|
+
Examples
|
|
1090
|
+
--------
|
|
1091
|
+
```python
|
|
1092
|
+
workers = deployed.replicas()
|
|
1093
|
+
for worker in workers:
|
|
1094
|
+
print(f"Worker {worker['workerId']}: {worker.get('status')}")
|
|
1095
|
+
```
|
|
1096
|
+
"""
|
|
1097
|
+
capsule_api = self._get_capsule_api()
|
|
1098
|
+
return capsule_api.get_workers(self._id)
|
|
1099
|
+
|
|
1100
|
+
def scale_to_zero(self):
|
|
1101
|
+
"""
|
|
1102
|
+
Scale the app down to zero replicas.
|
|
1103
|
+
|
|
1104
|
+
This stops all running workers while preserving the app configuration.
|
|
1105
|
+
The app can be scaled back up by sending traffic to the public URL
|
|
1106
|
+
(if autoscaling is configured) or by redeploying.
|
|
1107
|
+
|
|
1108
|
+
Examples
|
|
1109
|
+
--------
|
|
1110
|
+
```python
|
|
1111
|
+
# Scale down to save resources
|
|
1112
|
+
deployed.scale_to_zero()
|
|
1113
|
+
```
|
|
1114
|
+
"""
|
|
1115
|
+
capsule_api = self._get_capsule_api()
|
|
1116
|
+
return capsule_api.patch(
|
|
1117
|
+
self._id,
|
|
1118
|
+
{
|
|
1119
|
+
"autoscalingConfig": {
|
|
1120
|
+
"minReplicas": 0,
|
|
1121
|
+
"maxReplicas": 0,
|
|
1122
|
+
}
|
|
1123
|
+
},
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
def delete(self):
|
|
1127
|
+
"""
|
|
1128
|
+
Delete the deployed app.
|
|
1129
|
+
|
|
1130
|
+
This permanently removes the app from the platform, including all
|
|
1131
|
+
workers, configuration, and the public URL. This action cannot be undone.
|
|
1132
|
+
|
|
1133
|
+
Examples
|
|
1134
|
+
--------
|
|
1135
|
+
```python
|
|
1136
|
+
# Clean up the app
|
|
1137
|
+
deployed.delete()
|
|
1138
|
+
```
|
|
1139
|
+
"""
|
|
1140
|
+
capsule_api = self._get_capsule_api()
|
|
1141
|
+
return capsule_api.delete(self._id)
|
|
1142
|
+
|
|
1143
|
+
def auth(self) -> dict:
|
|
1144
|
+
"""
|
|
1145
|
+
Get authentication headers for making requests to this app.
|
|
1146
|
+
|
|
1147
|
+
Only available for apps configured with API authentication type.
|
|
1148
|
+
Use these headers when making HTTP requests to the app's public URL.
|
|
1149
|
+
|
|
1150
|
+
Returns
|
|
1151
|
+
-------
|
|
1152
|
+
dict
|
|
1153
|
+
Dictionary of HTTP headers to include in requests.
|
|
1154
|
+
|
|
1155
|
+
Raises
|
|
1156
|
+
------
|
|
1157
|
+
ValueError
|
|
1158
|
+
If the app is not configured with API authentication.
|
|
1159
|
+
|
|
1160
|
+
Examples
|
|
1161
|
+
--------
|
|
1162
|
+
```python
|
|
1163
|
+
import requests
|
|
1164
|
+
response = requests.get(deployed.public_url, headers=deployed.auth())
|
|
1165
|
+
```
|
|
1166
|
+
"""
|
|
1167
|
+
if self.auth_type == AuthType.BROWSER:
|
|
1168
|
+
raise ValueError(
|
|
1169
|
+
"Only API auth style is supported for accessing auth headers"
|
|
1170
|
+
)
|
|
1171
|
+
from metaflow.metaflow_config import SERVICE_HEADERS
|
|
1172
|
+
|
|
1173
|
+
return SERVICE_HEADERS
|
|
1174
|
+
|
|
1175
|
+
@property
|
|
1176
|
+
def id(self) -> str:
|
|
1177
|
+
"""
|
|
1178
|
+
Unique identifier for the deployed app.
|
|
1179
|
+
|
|
1180
|
+
Returns
|
|
1181
|
+
-------
|
|
1182
|
+
str
|
|
1183
|
+
The unique app identifier assigned by the platform.
|
|
1184
|
+
"""
|
|
1185
|
+
return self._id
|
|
1186
|
+
|
|
1187
|
+
@property
|
|
1188
|
+
def auth_type(self) -> str:
|
|
1189
|
+
"""
|
|
1190
|
+
Authentication type configured for this app. Can be either `Browser` , `API`, `BrowserAndApi`
|
|
1191
|
+
|
|
1192
|
+
Returns
|
|
1193
|
+
-------
|
|
1194
|
+
str
|
|
1195
|
+
The authentication type
|
|
1196
|
+
"""
|
|
1197
|
+
return self._capsule_type
|
|
1198
|
+
|
|
1199
|
+
@property
|
|
1200
|
+
def public_url(self) -> str:
|
|
1201
|
+
"""
|
|
1202
|
+
Public URL to access the deployed app.
|
|
1203
|
+
|
|
1204
|
+
Returns
|
|
1205
|
+
-------
|
|
1206
|
+
str
|
|
1207
|
+
The publicly accessible URL for this app.
|
|
1208
|
+
"""
|
|
1209
|
+
if self._public_url is None:
|
|
1210
|
+
info = self._capsule_info
|
|
1211
|
+
status = info.get("status", {})
|
|
1212
|
+
if status is None:
|
|
1213
|
+
status = {}
|
|
1214
|
+
access_info = status.get("accessInfo", {}) or {}
|
|
1215
|
+
self._public_url = access_info.get("outOfClusterURL", None)
|
|
1216
|
+
if self._public_url is not None:
|
|
1217
|
+
self._public_url = _format_url_string(self._public_url, True)
|
|
1218
|
+
return self._public_url
|
|
1219
|
+
|
|
1220
|
+
@property
|
|
1221
|
+
def internal_url(self) -> str:
|
|
1222
|
+
"""
|
|
1223
|
+
Internal in-cluster URL to access the deployed app.
|
|
1224
|
+
|
|
1225
|
+
This URL bypasses external network routing and can be used from within
|
|
1226
|
+
Metaflow tasks running on Kubernetes. Authentication headers are not
|
|
1227
|
+
required when accessing the app via this URL from within the cluster.
|
|
1228
|
+
|
|
1229
|
+
Returns
|
|
1230
|
+
-------
|
|
1231
|
+
str
|
|
1232
|
+
The in-cluster URL for this app.
|
|
1233
|
+
"""
|
|
1234
|
+
|
|
1235
|
+
info = self._capsule_info
|
|
1236
|
+
status = info.get("status", {})
|
|
1237
|
+
if status is None:
|
|
1238
|
+
status = {}
|
|
1239
|
+
access_info = status.get("accessInfo", {}) or {}
|
|
1240
|
+
internal_url = access_info.get("inClusterURL", None)
|
|
1241
|
+
if internal_url is not None:
|
|
1242
|
+
internal_url = _format_url_string(internal_url, False)
|
|
1243
|
+
return internal_url
|
|
1244
|
+
|
|
1245
|
+
@property
|
|
1246
|
+
def name(self) -> str:
|
|
1247
|
+
"""
|
|
1248
|
+
Logical name given to the app.
|
|
1249
|
+
|
|
1250
|
+
Returns
|
|
1251
|
+
-------
|
|
1252
|
+
str
|
|
1253
|
+
The human-readable name of the app.
|
|
1254
|
+
"""
|
|
1255
|
+
return self._name
|
|
1256
|
+
|
|
1257
|
+
@property
|
|
1258
|
+
def deployed_version(self) -> str:
|
|
1259
|
+
"""
|
|
1260
|
+
Current deployment version of the app.
|
|
1261
|
+
|
|
1262
|
+
Returns
|
|
1263
|
+
-------
|
|
1264
|
+
str
|
|
1265
|
+
The version identifier for the current deployment.
|
|
1266
|
+
"""
|
|
1267
|
+
return self._deployed_version
|
|
1268
|
+
|
|
1269
|
+
@property
|
|
1270
|
+
def deployed_at(self) -> datetime:
|
|
1271
|
+
"""
|
|
1272
|
+
Timestamp when the app was last deployed.
|
|
1273
|
+
|
|
1274
|
+
Returns
|
|
1275
|
+
-------
|
|
1276
|
+
datetime
|
|
1277
|
+
The datetime of the last deployment.
|
|
1278
|
+
"""
|
|
1279
|
+
return datetime.fromisoformat(self._deployed_at)
|
|
1280
|
+
|
|
1281
|
+
@property
|
|
1282
|
+
def tags(self) -> List[str]:
|
|
1283
|
+
"""
|
|
1284
|
+
Tags associated with this app.
|
|
1285
|
+
|
|
1286
|
+
Returns
|
|
1287
|
+
-------
|
|
1288
|
+
List[str]
|
|
1289
|
+
List of tags assigned to this app.
|
|
1290
|
+
"""
|
|
1291
|
+
capsule_info = self._capsule_info
|
|
1292
|
+
return capsule_info.get("spec", {}).get("tags", [])
|
|
1293
|
+
|
|
1294
|
+
def __repr__(self) -> str:
|
|
1295
|
+
return (
|
|
1296
|
+
f"DeployedApp(id='{self._id}', "
|
|
1297
|
+
f"name='{self._name}', "
|
|
1298
|
+
f"public_url='{self._public_url}', "
|
|
1299
|
+
f"deployed_version='{self._deployed_version}')"
|
|
1300
|
+
)
|