ob-metaflow-extensions 1.1.130__py2.py3-none-any.whl → 1.5.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- metaflow_extensions/outerbounds/__init__.py +1 -1
- metaflow_extensions/outerbounds/plugins/__init__.py +34 -4
- metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_utils.py +187 -0
- metaflow_extensions/outerbounds/plugins/apps/consts.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +15 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +128 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +330 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +15 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +165 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +966 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +299 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +537 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +959 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +201 -0
- metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +243 -0
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +118 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +1 -1
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +43 -9
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +12 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +2 -16
- metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +100 -19
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +6 -1
- metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +71 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct.py +131 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +289 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +286 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +218 -0
- metaflow_extensions/outerbounds/plugins/nvct/utils.py +29 -0
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +225 -0
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1924 -0
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/secrets/secrets.py +38 -2
- metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +81 -11
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +18 -8
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +6 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +45 -18
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +18 -9
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +10 -4
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/remote_config.py +46 -9
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +94 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.130.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.5.1.dist-info/RECORD +133 -0
- metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
- ob_metaflow_extensions-1.1.130.dist-info/RECORD +0 -56
- {ob_metaflow_extensions-1.1.130.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.130.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import metaflow.metaflow_config_funcs
|
|
2
2
|
|
|
3
|
-
from metaflow_extensions.outerbounds.remote_config import init_config
|
|
3
|
+
from metaflow_extensions.outerbounds.remote_config import init_config, reload_config
|
|
4
4
|
|
|
5
5
|
# we want to overide OSS Metaflow's initialization behavior with our own to support remote configs
|
|
6
6
|
# we're reassigning the METAFLOW_CONFIG variable because all downstream settings rely on it and
|
|
@@ -41,6 +41,9 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
41
41
|
import boto3
|
|
42
42
|
import botocore
|
|
43
43
|
from metaflow_extensions.outerbounds.plugins.auth_server import get_token
|
|
44
|
+
from metaflow_extensions.outerbounds.plugins.aws.assume_role import (
|
|
45
|
+
OBP_ASSUME_ROLE_ARN_ENV_VAR,
|
|
46
|
+
)
|
|
44
47
|
|
|
45
48
|
from hashlib import sha256
|
|
46
49
|
from metaflow.util import get_username
|
|
@@ -69,6 +72,12 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
69
72
|
if token_info.get("cspr_role_arn"):
|
|
70
73
|
cspr_role = token_info["cspr_role_arn"]
|
|
71
74
|
|
|
75
|
+
# Check if the assume_role decorator has set a CSPR ARN via environment variable
|
|
76
|
+
# This takes precedence over CSPR role that comes from the token_info response
|
|
77
|
+
decorator_role_arn = os.environ.get(OBP_ASSUME_ROLE_ARN_ENV_VAR)
|
|
78
|
+
if decorator_role_arn:
|
|
79
|
+
cspr_role = decorator_role_arn
|
|
80
|
+
|
|
72
81
|
if cspr_role:
|
|
73
82
|
# If CSPR role is set, we set it as the default role to assume
|
|
74
83
|
# for the AWS SDK. We do this by writing an AWS config file
|
|
@@ -162,13 +171,18 @@ class ObpAuthProvider(object):
|
|
|
162
171
|
client_params = {}
|
|
163
172
|
|
|
164
173
|
from botocore.exceptions import ClientError
|
|
174
|
+
from botocore.config import Config
|
|
165
175
|
|
|
166
176
|
with hide_access_keys():
|
|
167
177
|
session = get_boto3_session(role_arn, session_vars)
|
|
178
|
+
_client_params = client_params.copy()
|
|
179
|
+
if _client_params.get("config") and type(_client_params["config"]) == dict:
|
|
180
|
+
_client_params["config"] = Config(**_client_params["config"])
|
|
181
|
+
|
|
168
182
|
if with_error:
|
|
169
|
-
return session.client(module, **
|
|
183
|
+
return session.client(module, **_client_params), ClientError
|
|
170
184
|
else:
|
|
171
|
-
return session.client(module, **
|
|
185
|
+
return session.client(module, **_client_params)
|
|
172
186
|
|
|
173
187
|
|
|
174
188
|
AWS_CLIENT_PROVIDERS_DESC = [("obp", ".ObpAuthProvider")]
|
|
@@ -307,11 +321,13 @@ class ObpGcpAuthProvider(object):
|
|
|
307
321
|
GCP_CLIENT_PROVIDERS_DESC = [("obp", ".ObpGcpAuthProvider")]
|
|
308
322
|
CLIS_DESC = [
|
|
309
323
|
("nvidia", ".nvcf.nvcf_cli.cli"),
|
|
324
|
+
("nvct", ".nvct.nvct_cli.cli"),
|
|
310
325
|
("fast-bakery", ".fast_bakery.fast_bakery_cli.cli"),
|
|
311
326
|
("snowpark", ".snowpark.snowpark_cli.cli"),
|
|
312
327
|
]
|
|
313
328
|
STEP_DECORATORS_DESC = [
|
|
314
329
|
("nvidia", ".nvcf.nvcf_decorator.NvcfDecorator"),
|
|
330
|
+
("nvct", ".nvct.nvct_decorator.NvctDecorator"),
|
|
315
331
|
(
|
|
316
332
|
"fast_bakery_internal",
|
|
317
333
|
".fast_bakery.fast_bakery_decorator.InternalFastBakeryDecorator",
|
|
@@ -319,7 +335,21 @@ STEP_DECORATORS_DESC = [
|
|
|
319
335
|
("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
|
|
320
336
|
("tensorboard", ".tensorboard.TensorboardDecorator"),
|
|
321
337
|
("gpu_profile", ".profilers.gpu_profile_decorator.GPUProfileDecorator"),
|
|
322
|
-
("
|
|
338
|
+
("test_append_card", ".profilers.simple_card_decorator.DynamicCardAppendDecorator"),
|
|
339
|
+
("nim", ".nim.nim_decorator.NimDecorator"),
|
|
340
|
+
("ollama", ".ollama.OllamaDecorator"),
|
|
341
|
+
("vllm", ".vllm.VLLMDecorator"),
|
|
342
|
+
("s3_proxy", ".s3_proxy.s3_proxy_decorator.S3ProxyDecorator"),
|
|
343
|
+
("nebius_s3_proxy", ".s3_proxy.s3_proxy_decorator.NebiusS3ProxyDecorator"),
|
|
344
|
+
("coreweave_s3_proxy", ".s3_proxy.s3_proxy_decorator.CoreWeaveS3ProxyDecorator"),
|
|
345
|
+
(
|
|
346
|
+
"app_deploy_internal",
|
|
347
|
+
".apps.core.app_deploy_decorator.AppDeployInternalDecorator",
|
|
348
|
+
),
|
|
349
|
+
]
|
|
350
|
+
|
|
351
|
+
FLOW_DECORATORS_DESC = [
|
|
352
|
+
("app_deploy", ".apps.core.app_deploy_decorator.AppDeployFlowDecorator"),
|
|
323
353
|
]
|
|
324
354
|
|
|
325
355
|
TOGGLE_STEP_DECORATOR = [
|
|
@@ -338,4 +368,4 @@ SECRETS_PROVIDERS_DESC = [
|
|
|
338
368
|
("outerbounds", ".secrets.secrets.OuterboundsSecretsProvider"),
|
|
339
369
|
]
|
|
340
370
|
# Adding an override here so the library can be imported at the metaflow.plugins level
|
|
341
|
-
__mf_promote_submodules__ = ["snowflake"]
|
|
371
|
+
__mf_promote_submodules__ = ["snowflake", "ollama", "torchtune", "optuna"]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from metaflow.exception import MetaflowException
|
|
2
|
+
import os
|
|
3
|
+
from metaflow.metaflow_config_funcs import init_config
|
|
4
|
+
import requests
|
|
5
|
+
import time
|
|
6
|
+
import random
|
|
7
|
+
|
|
8
|
+
# IMPORTANT: Currently contents of this file are mostly duplicated from the outerbounds package.
|
|
9
|
+
# This is purely due to the time rush of having to deliver this feature. As a fast forward, we
|
|
10
|
+
# will reorganize things in a way that the amount of duplication in minimum.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
APP_READY_POLL_TIMEOUT_SECONDS = 300
|
|
14
|
+
# Even after our backend validates that the app routes are ready, it takes a few seconds for
|
|
15
|
+
# the app to be accessible via the browser. Till we hunt down this delay, add an extra buffer.
|
|
16
|
+
APP_READY_EXTRA_BUFFER_SECONDS = 30
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def start_app(port=-1, name=""):
|
|
20
|
+
"""
|
|
21
|
+
Starts an app on the workstation.
|
|
22
|
+
List workstations, looks for "NamedPorts", then makes an update call to the NamedPorts for the workstation.
|
|
23
|
+
"""
|
|
24
|
+
if len(name) == 0 or len(name) >= 20:
|
|
25
|
+
raise MetaflowException("App name should not be more than 20 characters long.")
|
|
26
|
+
elif not name.isalnum() or not name.islower():
|
|
27
|
+
raise MetaflowException(
|
|
28
|
+
"App name can only contain lowercase alphanumeric characters."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if "WORKSTATION_ID" not in os.environ:
|
|
32
|
+
raise MetaflowException(
|
|
33
|
+
"All outerbounds app commands can only be run from a workstation."
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Every workstation has this environment variable set.
|
|
37
|
+
workstation_id = os.environ["WORKSTATION_ID"]
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
try:
|
|
41
|
+
conf = init_config()
|
|
42
|
+
metaflow_token = conf["METAFLOW_SERVICE_AUTH_KEY"]
|
|
43
|
+
api_url = conf["OBP_API_SERVER"]
|
|
44
|
+
|
|
45
|
+
workstations_response = requests.get(
|
|
46
|
+
f"https://{api_url}/v1/workstations",
|
|
47
|
+
headers={"x-api-key": metaflow_token},
|
|
48
|
+
)
|
|
49
|
+
workstations_response.raise_for_status()
|
|
50
|
+
except:
|
|
51
|
+
raise MetaflowException("Failed to list workstations!")
|
|
52
|
+
|
|
53
|
+
workstations_json = workstations_response.json()["workstations"]
|
|
54
|
+
for workstation in workstations_json:
|
|
55
|
+
if workstation["instance_id"] == os.environ["WORKSTATION_ID"]:
|
|
56
|
+
if "named_ports" in workstation["spec"]:
|
|
57
|
+
try:
|
|
58
|
+
ensure_app_start_request_is_valid(
|
|
59
|
+
workstation["spec"]["named_ports"], port, name
|
|
60
|
+
)
|
|
61
|
+
except ValueError as e:
|
|
62
|
+
raise MetaflowException(str(e))
|
|
63
|
+
|
|
64
|
+
for named_port in workstation["spec"]["named_ports"]:
|
|
65
|
+
if int(named_port["port"]) == port:
|
|
66
|
+
if named_port["enabled"] and named_port["name"] == name:
|
|
67
|
+
print(f"App {name} started on port {port}!")
|
|
68
|
+
print(
|
|
69
|
+
f"Browser URL: https://{api_url.replace('api', 'ui')}/apps/{os.environ['WORKSTATION_ID']}/{name}/"
|
|
70
|
+
)
|
|
71
|
+
print(
|
|
72
|
+
f"API URL: https://{api_url}/apps/{os.environ['WORKSTATION_ID']}/{name}/"
|
|
73
|
+
)
|
|
74
|
+
return
|
|
75
|
+
else:
|
|
76
|
+
try:
|
|
77
|
+
response = requests.put(
|
|
78
|
+
f"https://{api_url}/v1/workstations/update/{workstation_id}/namedports",
|
|
79
|
+
headers={"x-api-key": metaflow_token},
|
|
80
|
+
json={
|
|
81
|
+
"port": port,
|
|
82
|
+
"name": name,
|
|
83
|
+
"enabled": True,
|
|
84
|
+
},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
response.raise_for_status()
|
|
88
|
+
poll_success = wait_for_app_port_to_be_accessible(
|
|
89
|
+
api_url,
|
|
90
|
+
metaflow_token,
|
|
91
|
+
workstation_id,
|
|
92
|
+
name,
|
|
93
|
+
APP_READY_POLL_TIMEOUT_SECONDS,
|
|
94
|
+
)
|
|
95
|
+
if poll_success:
|
|
96
|
+
print(f"App {name} started on port {port}!")
|
|
97
|
+
print(
|
|
98
|
+
f"Browser URL: https://{api_url.replace('api', 'ui')}/apps/{os.environ['WORKSTATION_ID']}/{name}/"
|
|
99
|
+
)
|
|
100
|
+
print(
|
|
101
|
+
f"API URL: https://{api_url}/apps/{os.environ['WORKSTATION_ID']}/{name}/"
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
raise MetaflowException(
|
|
105
|
+
f"The app could not be deployed in {APP_READY_POLL_TIMEOUT_SECONDS / 60} minutes. Please try again later."
|
|
106
|
+
)
|
|
107
|
+
except Exception:
|
|
108
|
+
raise MetaflowException(
|
|
109
|
+
f"Failed to start app {name} on port {port}!"
|
|
110
|
+
)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
raise MetaflowException(f"Failed to start app {name} on port {port}!")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def ensure_app_start_request_is_valid(existing_named_ports, port: int, name: str):
|
|
116
|
+
"""
|
|
117
|
+
Ensures that the port number is available on the workstation and that an app of
|
|
118
|
+
the same name is not already opened on a different port.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
existing_named_ports: A list of named ports on the workstation.
|
|
122
|
+
port: The port number to check.
|
|
123
|
+
name: The name of the app to check.
|
|
124
|
+
"""
|
|
125
|
+
existing_apps_by_port = {np["port"]: np for np in existing_named_ports}
|
|
126
|
+
|
|
127
|
+
if port not in existing_apps_by_port:
|
|
128
|
+
raise MetaflowException(f"Port {port} not found on workstation")
|
|
129
|
+
|
|
130
|
+
for existing_named_port in existing_named_ports:
|
|
131
|
+
if (
|
|
132
|
+
name == existing_named_port["name"]
|
|
133
|
+
and existing_named_port["port"] != port
|
|
134
|
+
and existing_named_port["enabled"]
|
|
135
|
+
):
|
|
136
|
+
raise MetaflowException(
|
|
137
|
+
f"App with name '{name}' is already deployed on port {existing_named_port['port']}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def wait_for_app_port_to_be_accessible(
|
|
142
|
+
api_url, metaflow_token, workstation_id, app_name, poll_timeout_seconds
|
|
143
|
+
) -> bool:
|
|
144
|
+
"""
|
|
145
|
+
Waits for the app to be ready by polling the workstation status.
|
|
146
|
+
"""
|
|
147
|
+
num_retries_per_request = 3
|
|
148
|
+
start_time = time.time()
|
|
149
|
+
retry_delay = 1.0
|
|
150
|
+
poll_interval = 10
|
|
151
|
+
wait_message = f"App {app_name} is currently being deployed..."
|
|
152
|
+
while time.time() - start_time < poll_timeout_seconds:
|
|
153
|
+
for _ in range(num_retries_per_request):
|
|
154
|
+
try:
|
|
155
|
+
workstations_response = requests.get(
|
|
156
|
+
f"https://{api_url}/v1/workstations",
|
|
157
|
+
headers={"x-api-key": metaflow_token},
|
|
158
|
+
)
|
|
159
|
+
workstations_response.raise_for_status()
|
|
160
|
+
if is_app_ready(workstations_response.json(), workstation_id, app_name):
|
|
161
|
+
print(wait_message)
|
|
162
|
+
time.sleep(APP_READY_EXTRA_BUFFER_SECONDS)
|
|
163
|
+
return True
|
|
164
|
+
else:
|
|
165
|
+
print(wait_message)
|
|
166
|
+
time.sleep(poll_interval)
|
|
167
|
+
except (
|
|
168
|
+
requests.exceptions.ConnectionError,
|
|
169
|
+
requests.exceptions.ReadTimeout,
|
|
170
|
+
):
|
|
171
|
+
time.sleep(retry_delay)
|
|
172
|
+
retry_delay *= 2 # Double the delay for the next attempt
|
|
173
|
+
retry_delay += random.uniform(0, 1) # Add jitter
|
|
174
|
+
retry_delay = min(retry_delay, 10)
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def is_app_ready(response_json: dict, workstation_id: str, app_name: str) -> bool:
|
|
179
|
+
"""Checks if the app is ready in the given workstation's response."""
|
|
180
|
+
workstations = response_json.get("workstations", [])
|
|
181
|
+
for workstation in workstations:
|
|
182
|
+
if workstation.get("instance_id") == workstation_id:
|
|
183
|
+
hosted_apps = workstation.get("status", {}).get("hosted_apps", [])
|
|
184
|
+
for hosted_app in hosted_apps:
|
|
185
|
+
if hosted_app.get("name") == app_name:
|
|
186
|
+
return bool(hosted_app.get("ready"))
|
|
187
|
+
return False
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from . import config
|
|
2
|
+
from . import dependencies
|
|
3
|
+
from . import capsule
|
|
4
|
+
from . import utils
|
|
5
|
+
from . import app_config
|
|
6
|
+
from . import code_package
|
|
7
|
+
from .deployer import AppDeployer, bake_image, package_code, DeployedApp
|
|
8
|
+
from .config import BakedImage, PackagedCode
|
|
9
|
+
from .config.typed_configs import (
|
|
10
|
+
ReplicaConfigDict,
|
|
11
|
+
ResourceConfigDict,
|
|
12
|
+
AuthConfigDict,
|
|
13
|
+
DependencyConfigDict,
|
|
14
|
+
PackageConfigDict,
|
|
15
|
+
)
|