ob-metaflow-extensions 1.1.168rc5__py2.py3-none-any.whl → 1.1.170__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- metaflow_extensions/outerbounds/plugins/__init__.py +25 -33
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +11 -1
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +106 -30
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -1
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +1 -1
- {ob_metaflow_extensions-1.1.168rc5.dist-info → ob_metaflow_extensions-1.1.170.dist-info}/METADATA +2 -2
- {ob_metaflow_extensions-1.1.168rc5.dist-info → ob_metaflow_extensions-1.1.170.dist-info}/RECORD +9 -11
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -26
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +0 -110
- {ob_metaflow_extensions-1.1.168rc5.dist-info → ob_metaflow_extensions-1.1.170.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.168rc5.dist-info → ob_metaflow_extensions-1.1.170.dist-info}/top_level.txt +0 -0
|
@@ -52,10 +52,6 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
52
52
|
|
|
53
53
|
token_info = get_token("/generate/aws")
|
|
54
54
|
|
|
55
|
-
# Check if the assume_role decorator has set a role ARN via environment variable
|
|
56
|
-
# This takes precedence over CSPR role
|
|
57
|
-
decorator_role_arn = os.environ.get(OBP_ASSUME_ROLE_ARN_ENV_VAR)
|
|
58
|
-
|
|
59
55
|
# Write token to a file. The file name is derived from the user name
|
|
60
56
|
# so it works with multiple users on the same machine.
|
|
61
57
|
#
|
|
@@ -76,13 +72,18 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
76
72
|
if token_info.get("cspr_role_arn"):
|
|
77
73
|
cspr_role = token_info["cspr_role_arn"]
|
|
78
74
|
|
|
79
|
-
#
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
75
|
+
# Check if the assume_role decorator has set a CSPR ARN via environment variable
|
|
76
|
+
# This takes precedence over CSPR role that comes from the token_info response
|
|
77
|
+
decorator_role_arn = os.environ.get(OBP_ASSUME_ROLE_ARN_ENV_VAR)
|
|
78
|
+
if decorator_role_arn:
|
|
79
|
+
cspr_role = decorator_role_arn
|
|
80
|
+
|
|
81
|
+
if cspr_role:
|
|
82
|
+
# If CSPR role is set, we set it as the default role to assume
|
|
83
|
+
# for the AWS SDK. We do this by writing an AWS config file
|
|
83
84
|
# with two profiles. One to get credentials for the task role
|
|
84
85
|
# in exchange for the OIDC token, and second to assume the
|
|
85
|
-
#
|
|
86
|
+
# CSPR role using the task role credentials.
|
|
86
87
|
import configparser
|
|
87
88
|
from io import StringIO
|
|
88
89
|
|
|
@@ -94,9 +95,9 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
94
95
|
"web_identity_token_file": token_file,
|
|
95
96
|
}
|
|
96
97
|
|
|
97
|
-
#
|
|
98
|
-
aws_config["profile
|
|
99
|
-
"role_arn":
|
|
98
|
+
# CSPR role profile (default)
|
|
99
|
+
aws_config["profile cspr"] = {
|
|
100
|
+
"role_arn": cspr_role,
|
|
100
101
|
"source_profile": "task",
|
|
101
102
|
}
|
|
102
103
|
|
|
@@ -112,7 +113,7 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
112
113
|
tmp_aws_config_file = f.name
|
|
113
114
|
os.rename(tmp_aws_config_file, aws_config_file)
|
|
114
115
|
os.environ["AWS_CONFIG_FILE"] = aws_config_file
|
|
115
|
-
os.environ["AWS_PROFILE"] = "
|
|
116
|
+
os.environ["AWS_PROFILE"] = "cspr"
|
|
116
117
|
else:
|
|
117
118
|
os.environ["AWS_WEB_IDENTITY_TOKEN_FILE"] = token_file
|
|
118
119
|
os.environ["AWS_ROLE_ARN"] = token_info["role_arn"]
|
|
@@ -124,29 +125,21 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
124
125
|
if token_info.get("region"):
|
|
125
126
|
os.environ["AWS_DEFAULT_REGION"] = token_info["region"]
|
|
126
127
|
|
|
127
|
-
if
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
#
|
|
132
|
-
session = boto3.session.Session(profile_name="
|
|
128
|
+
if cspr_role:
|
|
129
|
+
# The generated AWS config will be used here since we set the
|
|
130
|
+
# AWS_CONFIG_FILE environment variable above.
|
|
131
|
+
if role_arn == USE_CSPR_ROLE_ARN_IF_SET:
|
|
132
|
+
# Otherwise start from the default profile, assuming CSPR role
|
|
133
|
+
session = boto3.session.Session(profile_name="cspr")
|
|
133
134
|
else:
|
|
134
135
|
session = boto3.session.Session(profile_name="task")
|
|
135
136
|
else:
|
|
136
|
-
# No decorator role or CSPR role, use default session
|
|
137
137
|
# Not using AWS config, just AWS_WEB_IDENTITY_TOKEN_FILE + AWS_ROLE_ARN
|
|
138
138
|
session = boto3.session.Session()
|
|
139
139
|
|
|
140
|
-
if
|
|
141
|
-
role_arn
|
|
142
|
-
|
|
143
|
-
and role_arn != decorator_role_arn
|
|
144
|
-
):
|
|
145
|
-
# If the user provided a role_arn that's different from the decorator role,
|
|
146
|
-
# we assume that role using the current session credentials.
|
|
147
|
-
# This works for both cases:
|
|
148
|
-
# 1. No decorator role: Task role -> Secrets role
|
|
149
|
-
# 2. With decorator role: Decorator role -> Secrets role
|
|
140
|
+
if role_arn and role_arn != USE_CSPR_ROLE_ARN_IF_SET:
|
|
141
|
+
# If the user provided a role_arn, we assume that role
|
|
142
|
+
# using the task role credentials. CSPR role is not used.
|
|
150
143
|
fetcher = botocore.credentials.AssumeRoleCredentialFetcher(
|
|
151
144
|
client_creator=session._session.create_client,
|
|
152
145
|
source_credentials=session._session.get_credentials(),
|
|
@@ -162,8 +155,8 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
162
155
|
else:
|
|
163
156
|
# If the user didn't provide a role_arn, or if the role_arn
|
|
164
157
|
# is set to USE_CSPR_ROLE_ARN_IF_SET, we return the default
|
|
165
|
-
# session which would use the
|
|
166
|
-
#
|
|
158
|
+
# session which would use the CSPR role if it is set on the
|
|
159
|
+
# server, and the task role otherwise.
|
|
167
160
|
return session
|
|
168
161
|
|
|
169
162
|
|
|
@@ -331,7 +324,6 @@ CLIS_DESC = [
|
|
|
331
324
|
("nvct", ".nvct.nvct_cli.cli"),
|
|
332
325
|
("fast-bakery", ".fast_bakery.fast_bakery_cli.cli"),
|
|
333
326
|
("snowpark", ".snowpark.snowpark_cli.cli"),
|
|
334
|
-
("app", ".apps.app_cli.cli"),
|
|
335
327
|
]
|
|
336
328
|
STEP_DECORATORS_DESC = [
|
|
337
329
|
("nvidia", ".nvcf.nvcf_decorator.NvcfDecorator"),
|
|
@@ -90,6 +90,7 @@ class DockerEnvironmentException(MetaflowException):
|
|
|
90
90
|
class DockerEnvironment(MetaflowEnvironment):
|
|
91
91
|
TYPE = "fast-bakery"
|
|
92
92
|
_filecache = None
|
|
93
|
+
_force_rebuild = False
|
|
93
94
|
|
|
94
95
|
def __init__(self, flow):
|
|
95
96
|
self.skipped_steps = set()
|
|
@@ -178,12 +179,20 @@ class DockerEnvironment(MetaflowEnvironment):
|
|
|
178
179
|
|
|
179
180
|
if self.skipped_steps:
|
|
180
181
|
self.delegate = CondaEnvironment(self.flow)
|
|
182
|
+
self.delegate._force_rebuild = self._force_rebuild
|
|
181
183
|
self.delegate.set_local_root(self.local_root)
|
|
182
184
|
self.delegate.validate_environment(echo, self.datastore_type)
|
|
183
185
|
self.delegate.init_environment(echo, self.skipped_steps)
|
|
184
186
|
|
|
185
187
|
def _bake(self, steps) -> Dict[str, FastBakeryApiResponse]:
|
|
186
188
|
metafile_path = get_fastbakery_metafile_path(self.local_root, self.flow.name)
|
|
189
|
+
if self._force_rebuild:
|
|
190
|
+
# clear the metafile if force rebuilding, effectively skipping the cache.
|
|
191
|
+
try:
|
|
192
|
+
os.remove(metafile_path)
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
|
|
187
196
|
logger_lock = threading.Lock()
|
|
188
197
|
|
|
189
198
|
@cache_request(metafile_path)
|
|
@@ -201,7 +210,8 @@ class DockerEnvironment(MetaflowEnvironment):
|
|
|
201
210
|
bakery.pypi_packages(pypi_packages)
|
|
202
211
|
bakery.conda_packages(conda_packages)
|
|
203
212
|
bakery.base_image(base_image)
|
|
204
|
-
|
|
213
|
+
if self._force_rebuild:
|
|
214
|
+
bakery.ignore_cache()
|
|
205
215
|
|
|
206
216
|
with logger_lock:
|
|
207
217
|
self.logger(f"🍳 Baking [{ref}] ...")
|
|
@@ -39,7 +39,7 @@ def derive_job_outcome(job_status: "V1JobStatus"):
|
|
|
39
39
|
|
|
40
40
|
# This means that the job has neither finished or succedded.
|
|
41
41
|
if job_status.active:
|
|
42
|
-
return JobOutcomes.
|
|
42
|
+
return JobOutcomes.DELETE
|
|
43
43
|
|
|
44
44
|
# This means that the job is not active. Had started. There is not succedded/fail.
|
|
45
45
|
# This is a weird state. Better to just kill the job
|
|
@@ -47,7 +47,7 @@ def derive_job_outcome(job_status: "V1JobStatus"):
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class PodKiller:
|
|
50
|
-
def __init__(self, kubernetes_client, echo_func, namespace):
|
|
50
|
+
def __init__(self, kubernetes_client, echo_func, namespace, progress_bar=None):
|
|
51
51
|
self.client = kubernetes_client
|
|
52
52
|
self.echo = echo_func
|
|
53
53
|
self.api_instance = self.client.CoreV1Api()
|
|
@@ -55,6 +55,7 @@ class PodKiller:
|
|
|
55
55
|
self._namespace = namespace
|
|
56
56
|
self.jobset_api = None
|
|
57
57
|
self.jobset_api = self.client.CustomObjectsApi()
|
|
58
|
+
self.progress_bar = progress_bar
|
|
58
59
|
|
|
59
60
|
def _delete_jobset(self, owner_ref, namespace):
|
|
60
61
|
"""Delete a JobSet if it's the owner of a job."""
|
|
@@ -147,20 +148,31 @@ class PodKiller:
|
|
|
147
148
|
|
|
148
149
|
def _find_matching_jobs(self, flow_name, run_id=None, user=None):
|
|
149
150
|
"""Find jobs that match the flow_name, run_id, and user criteria using similar logic to _find_active_pods"""
|
|
151
|
+
|
|
152
|
+
def paginated_job_finder(namespace):
|
|
153
|
+
continue_token = None
|
|
154
|
+
while True:
|
|
155
|
+
response = self.job_api.list_namespaced_job(
|
|
156
|
+
namespace=namespace, limit=100, _continue=continue_token
|
|
157
|
+
)
|
|
158
|
+
yield response.items
|
|
159
|
+
continue_token = response.metadata._continue
|
|
160
|
+
if not continue_token:
|
|
161
|
+
break
|
|
162
|
+
|
|
150
163
|
try:
|
|
151
|
-
jobs = self.job_api.list_namespaced_job(namespace=self._namespace)
|
|
152
164
|
matching_jobs = []
|
|
153
|
-
for
|
|
154
|
-
job
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
165
|
+
for _jobs in paginated_job_finder(self._namespace):
|
|
166
|
+
for job in _jobs:
|
|
167
|
+
_match = self._metaflow_matching_spec(
|
|
168
|
+
run_id=run_id,
|
|
169
|
+
user=user,
|
|
170
|
+
flow_name=flow_name,
|
|
171
|
+
annotations=job.metadata.annotations,
|
|
172
|
+
labels=job.metadata.labels,
|
|
173
|
+
)
|
|
174
|
+
if _match:
|
|
175
|
+
matching_jobs.append(job)
|
|
164
176
|
return matching_jobs
|
|
165
177
|
except Exception as e:
|
|
166
178
|
self.echo(f"Error finding jobs: {str(e)}\n")
|
|
@@ -171,25 +183,38 @@ class PodKiller:
|
|
|
171
183
|
if not self.jobset_api:
|
|
172
184
|
return []
|
|
173
185
|
|
|
186
|
+
def paginated_jobset_finder(namespace):
|
|
187
|
+
continue_token = None
|
|
188
|
+
responses = []
|
|
189
|
+
while True:
|
|
190
|
+
response = self.jobset_api.list_namespaced_custom_object(
|
|
191
|
+
group="jobset.x-k8s.io",
|
|
192
|
+
version="v1alpha2",
|
|
193
|
+
namespace=namespace,
|
|
194
|
+
plural="jobsets",
|
|
195
|
+
limit=100,
|
|
196
|
+
**({"_continue": continue_token} if continue_token else {}),
|
|
197
|
+
)
|
|
198
|
+
continue_token = response.get("metadata", {}).get("continue", None)
|
|
199
|
+
responses.append(response)
|
|
200
|
+
if not continue_token:
|
|
201
|
+
break
|
|
202
|
+
return responses
|
|
203
|
+
|
|
174
204
|
try:
|
|
175
|
-
jobsets = self.jobset_api.list_namespaced_custom_object(
|
|
176
|
-
group="jobset.x-k8s.io",
|
|
177
|
-
version="v1alpha2",
|
|
178
|
-
namespace=self._namespace,
|
|
179
|
-
plural="jobsets",
|
|
180
|
-
)
|
|
181
205
|
matching_jobsets = []
|
|
182
206
|
|
|
183
|
-
for
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
207
|
+
for jobset_response in paginated_jobset_finder(self._namespace):
|
|
208
|
+
for jobset in jobset_response.get("items", []):
|
|
209
|
+
_match = self._metaflow_matching_spec(
|
|
210
|
+
run_id=run_id,
|
|
211
|
+
user=user,
|
|
212
|
+
flow_name=flow_name,
|
|
213
|
+
annotations=jobset.get("metadata", {}).get("annotations", {}),
|
|
214
|
+
labels=jobset.get("metadata", {}).get("labels", {}),
|
|
215
|
+
)
|
|
216
|
+
if _match:
|
|
217
|
+
matching_jobsets.append(jobset)
|
|
193
218
|
|
|
194
219
|
return matching_jobsets
|
|
195
220
|
except Exception as e:
|
|
@@ -270,9 +295,20 @@ class PodKiller:
|
|
|
270
295
|
self.echo(f"Unknown outcome {outcome} for JobSet {jobset_name}")
|
|
271
296
|
return False
|
|
272
297
|
|
|
298
|
+
def extract_matching_jobs_and_jobsets(self, flow_name, run_id, user):
|
|
299
|
+
"""Extract matching jobs and jobsets based on the flow_name, run_id, and user criteria"""
|
|
300
|
+
jobs = self._find_matching_jobs(flow_name, run_id, user)
|
|
301
|
+
jobsets = self._find_matching_jobsets(flow_name, run_id, user)
|
|
302
|
+
return [(j, derive_job_outcome(j.status)) for j in jobs], [
|
|
303
|
+
(j, derive_jobset_outcome(j.get("status", {}))) for j in jobsets
|
|
304
|
+
]
|
|
305
|
+
|
|
273
306
|
def process_matching_jobs_and_jobsets(self, flow_name, run_id, user):
|
|
274
307
|
"""Process all matching jobs and jobsets based on their derived outcomes"""
|
|
275
308
|
results = []
|
|
309
|
+
progress_update = lambda x: x
|
|
310
|
+
if self.progress_bar:
|
|
311
|
+
progress_update = lambda x: self.progress_bar.update(1, x)
|
|
276
312
|
|
|
277
313
|
# Process matching jobs
|
|
278
314
|
_jobs, _jobsets = [], []
|
|
@@ -282,6 +318,7 @@ class PodKiller:
|
|
|
282
318
|
result = self._handle_job_outcome(job, outcome)
|
|
283
319
|
# results.append(result)
|
|
284
320
|
if result is not None:
|
|
321
|
+
progress_update("💀 Killing Job %s" % job.metadata.name)
|
|
285
322
|
results.append(result)
|
|
286
323
|
_jobs.append(result)
|
|
287
324
|
|
|
@@ -292,7 +329,46 @@ class PodKiller:
|
|
|
292
329
|
outcome = derive_jobset_outcome(jobset_status)
|
|
293
330
|
result = self._handle_jobset_outcome(jobset, outcome)
|
|
294
331
|
if result is not None:
|
|
332
|
+
progress_update(
|
|
333
|
+
"💀 Deleting JobSet %s"
|
|
334
|
+
% jobset.get("metadata", {}).get("name", "unknown")
|
|
335
|
+
)
|
|
295
336
|
results.append(result)
|
|
296
337
|
_jobsets.append(result)
|
|
297
338
|
|
|
298
339
|
return results, len(_jobs), len(_jobsets)
|
|
340
|
+
|
|
341
|
+
def process_matching_jobs_and_jobsets_force_all(self, flow_name, run_id, user):
|
|
342
|
+
"""Force process ALL matching jobs and jobsets regardless of their status/outcome"""
|
|
343
|
+
results = []
|
|
344
|
+
progress_update = lambda x: x
|
|
345
|
+
if self.progress_bar:
|
|
346
|
+
progress_update = lambda x: self.progress_bar.update(1, x)
|
|
347
|
+
|
|
348
|
+
# Process matching jobs - FORCE DELETE ALL
|
|
349
|
+
_jobs, _jobsets = [], []
|
|
350
|
+
jobs = self._find_matching_jobs(flow_name, run_id, user)
|
|
351
|
+
for job in jobs:
|
|
352
|
+
# Force DELETE outcome regardless of actual status
|
|
353
|
+
result = self._handle_job_outcome(job, JobOutcomes.DELETE)
|
|
354
|
+
progress_update("🔥 FORCE Deleting Job %s" % job.metadata.name)
|
|
355
|
+
results.append(
|
|
356
|
+
result if result is not None else True
|
|
357
|
+
) # Treat None as success for force mode
|
|
358
|
+
_jobs.append(result if result is not None else True)
|
|
359
|
+
|
|
360
|
+
# Process matching jobsets - FORCE DELETE ALL
|
|
361
|
+
jobsets = self._find_matching_jobsets(flow_name, run_id, user)
|
|
362
|
+
for jobset in jobsets:
|
|
363
|
+
# Force DELETE outcome regardless of actual status
|
|
364
|
+
result = self._handle_jobset_outcome(jobset, JobOutcomes.DELETE)
|
|
365
|
+
progress_update(
|
|
366
|
+
"🔥 FORCE Deleting JobSet %s"
|
|
367
|
+
% jobset.get("metadata", {}).get("name", "unknown")
|
|
368
|
+
)
|
|
369
|
+
results.append(
|
|
370
|
+
result if result is not None else True
|
|
371
|
+
) # Treat None as success for force mode
|
|
372
|
+
_jobsets.append(result if result is not None else True)
|
|
373
|
+
|
|
374
|
+
return results, len(_jobs), len(_jobsets)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from ..plugins.
|
|
1
|
+
from ..plugins.kubernetes.pod_killer import PodKiller
|
{ob_metaflow_extensions-1.1.168rc5.dist-info → ob_metaflow_extensions-1.1.170.dist-info}/METADATA
RENAMED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ob-metaflow-extensions
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.170
|
|
4
4
|
Summary: Outerbounds Platform Extensions for Metaflow
|
|
5
5
|
Author: Outerbounds, Inc.
|
|
6
6
|
License: Commercial
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
Requires-Dist: boto3
|
|
9
9
|
Requires-Dist: kubernetes
|
|
10
|
-
Requires-Dist: ob-metaflow (==2.15.
|
|
10
|
+
Requires-Dist: ob-metaflow (==2.15.18.1)
|
|
11
11
|
|
|
12
12
|
# Outerbounds platform package
|
|
13
13
|
|
{ob_metaflow_extensions-1.1.168rc5.dist-info → ob_metaflow_extensions-1.1.170.dist-info}/RECORD
RENAMED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
metaflow_extensions/outerbounds/__init__.py,sha256=Gb8u06s9ClQsA_vzxmkCzuMnigPy7kKcDnLfb7eB-64,514
|
|
2
2
|
metaflow_extensions/outerbounds/remote_config.py,sha256=pEFJuKDYs98eoB_-ryPjVi9b_c4gpHMdBHE14ltoxIU,4672
|
|
3
3
|
metaflow_extensions/outerbounds/config/__init__.py,sha256=JsQGRuGFz28fQWjUvxUgR8EKBLGRdLUIk_buPLJplJY,1225
|
|
4
|
-
metaflow_extensions/outerbounds/plugins/__init__.py,sha256=
|
|
4
|
+
metaflow_extensions/outerbounds/plugins/__init__.py,sha256=qaGCEa_QFWgGURABv-ss0TYbC1RJTLlckogTA5tc3Bk,13713
|
|
5
5
|
metaflow_extensions/outerbounds/plugins/auth_server.py,sha256=_Q9_2EL0Xy77bCRphkwT1aSu8gQXRDOH-Z-RxTUO8N4,2202
|
|
6
6
|
metaflow_extensions/outerbounds/plugins/perimeters.py,sha256=QXh3SFP7GQbS-RAIxUOPbhPzQ7KDFVxZkTdKqFKgXjI,2697
|
|
7
7
|
metaflow_extensions/outerbounds/plugins/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
metaflow_extensions/outerbounds/plugins/apps/app_cli.py,sha256=erfKCC6zKuwax0ye9j-tqIkAVdCg7MJfbRGhMhViSzU,575
|
|
9
8
|
metaflow_extensions/outerbounds/plugins/apps/app_utils.py,sha256=sw9whU17lAzlD2K2kEDNjlk1Ib-2xE2UNhJkmzD8Qv8,8543
|
|
10
9
|
metaflow_extensions/outerbounds/plugins/apps/consts.py,sha256=iHsyqbUg9k-rgswCs1Jxf5QZIxR1V-peCDRjgr9kdBM,177
|
|
11
10
|
metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py,sha256=VkmiMdNYHhNdt-Qm9AVv7aE2LWFsIFEc16YcOYjwF6Q,8568
|
|
@@ -21,14 +20,13 @@ metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py,sha256
|
|
|
21
20
|
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py,sha256=_WzoOROFjoFa8TzsMNFp-r_1Zz7NUp-5ljn_kKlczXA,4534
|
|
22
21
|
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py,sha256=zgqDLFewCeF5jqh-hUNKmC_OAjld09ln0bb8Lkeqapc,4659
|
|
23
22
|
metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
-
metaflow_extensions/outerbounds/plugins/fast_bakery/
|
|
25
|
-
metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py,sha256=Tl520HdBteg-aDOM7mnnJJpdDCZc49BmFFmLUc_vTi8,15018
|
|
23
|
+
metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py,sha256=nmp_INGIAiWyrhyJ71BH38eRLu1xCIEEKejmXNQ6RlA,15378
|
|
26
24
|
metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py,sha256=PE81ZB54OAMXkMGSB7JqgvgMg7N9kvoVclrWL-6jc2U,5626
|
|
27
25
|
metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py,sha256=kqFyu2bJSnc9_9aYfBpz5xK6L6luWFZK_NMuh8f1eVk,1494
|
|
28
26
|
metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py,sha256=MXSIp05-jvt8Q2uGaLKjtuM_ToLeRLxhtMbfHc9Kcko,1515
|
|
29
27
|
metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
|
|
30
28
|
metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py,sha256=sjBhQ4aa-i1UkKsJyTswdDLYOBAFIvHRco4r7wfs9Tc,5003
|
|
31
|
-
metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py,sha256=
|
|
29
|
+
metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py,sha256=3b9y6gYEqWkN-_-GOqcAV2pOJOeCjpVmfW-nOMCJ4Z0,14924
|
|
32
30
|
metaflow_extensions/outerbounds/plugins/nim/card.py,sha256=dXOJvsZed5NyYyxYLPDvtwg9z_X4azL9HTJGYaiNriY,4690
|
|
33
31
|
metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py,sha256=50YVvC7mcZYlPluM0Wq1UtufhzlQb-RxzZkTOJJ3LkM,3439
|
|
34
32
|
metaflow_extensions/outerbounds/plugins/nim/nim_manager.py,sha256=y8U71106KJtrC6nlhsNnzX9Xkv3RnyZ1KEpRFwqZZFk,13686
|
|
@@ -77,8 +75,8 @@ metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py,sha256=E6SCBoanNNsF
|
|
|
77
75
|
metaflow_extensions/outerbounds/profilers/__init__.py,sha256=wa_jhnCBr82TBxoS0e8b6_6sLyZX0fdHicuGJZNTqKw,29
|
|
78
76
|
metaflow_extensions/outerbounds/profilers/gpu.py,sha256=3Er8uKQzfm_082uadg4yn_D4Y-iSCgzUfFmguYxZsz4,27485
|
|
79
77
|
metaflow_extensions/outerbounds/toplevel/__init__.py,sha256=qWUJSv_r5hXJ7jV_On4nEasKIfUCm6_UjkjXWA_A1Ts,90
|
|
80
|
-
metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=
|
|
81
|
-
metaflow_extensions/outerbounds/toplevel/ob_internal.py,sha256=
|
|
78
|
+
metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=FS0ZKQJsJKlw9PgtLqVV4kH9o7_qwYcIVtyu2Kqwa_U,2973
|
|
79
|
+
metaflow_extensions/outerbounds/toplevel/ob_internal.py,sha256=RtF7t9EIyGjWi36jAHbRaLo_MNOxF5Jo8X9QWzOr0co,54
|
|
82
80
|
metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py,sha256=WUuhz2YQfI4fz7nIcipwwWq781eaoHEk7n4GAn1npDg,63
|
|
83
81
|
metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py,sha256=BbZiaH3uILlEZ6ntBLKeNyqn3If8nIXZFq_Apd7Dhco,70
|
|
84
82
|
metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
|
|
@@ -86,7 +84,7 @@ metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py,sha256=GRSz2
|
|
|
86
84
|
metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
|
|
87
85
|
metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py,sha256=uTVkdSk3xZ7hEKYfdlyVteWj5KeDwaM1hU9WT-_YKfI,50
|
|
88
86
|
metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py,sha256=ekcgD3KVydf-a0xMI60P4uy6ePkSEoFHiGnDq1JM940,45
|
|
89
|
-
ob_metaflow_extensions-1.1.
|
|
90
|
-
ob_metaflow_extensions-1.1.
|
|
91
|
-
ob_metaflow_extensions-1.1.
|
|
92
|
-
ob_metaflow_extensions-1.1.
|
|
87
|
+
ob_metaflow_extensions-1.1.170.dist-info/METADATA,sha256=1wG03fowsX-nQaJbPyG2D9LdZKeNDZp08DMRS7A4re4,521
|
|
88
|
+
ob_metaflow_extensions-1.1.170.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
|
89
|
+
ob_metaflow_extensions-1.1.170.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
|
|
90
|
+
ob_metaflow_extensions-1.1.170.dist-info/RECORD,,
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from metaflow._vendor import click
|
|
2
|
-
|
|
3
|
-
OUTERBOUNDS_APP_CLI_AVAILABLE = True
|
|
4
|
-
try:
|
|
5
|
-
import outerbounds.apps.app_cli as ob_apps_cli
|
|
6
|
-
except ImportError:
|
|
7
|
-
OUTERBOUNDS_APP_CLI_AVAILABLE = False
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
if not OUTERBOUNDS_APP_CLI_AVAILABLE:
|
|
11
|
-
|
|
12
|
-
@click.group()
|
|
13
|
-
def _cli():
|
|
14
|
-
pass
|
|
15
|
-
|
|
16
|
-
@_cli.group(help="Dummy Group to append to CLI for Safety")
|
|
17
|
-
def app():
|
|
18
|
-
pass
|
|
19
|
-
|
|
20
|
-
@app.command(help="Dummy Command to append to CLI for Safety")
|
|
21
|
-
def cannot_deploy():
|
|
22
|
-
raise Exception("Outerbounds App CLI not available")
|
|
23
|
-
|
|
24
|
-
cli = _cli
|
|
25
|
-
else:
|
|
26
|
-
cli = ob_apps_cli.cli
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
import threading
|
|
2
|
-
import time
|
|
3
|
-
import sys
|
|
4
|
-
from typing import Dict, Optional, Any, Callable
|
|
5
|
-
from functools import partial
|
|
6
|
-
from metaflow.exception import MetaflowException
|
|
7
|
-
from metaflow.metaflow_config import FAST_BAKERY_URL
|
|
8
|
-
|
|
9
|
-
from .fast_bakery import FastBakery, FastBakeryApiResponse, FastBakeryException
|
|
10
|
-
from .docker_environment import cache_request
|
|
11
|
-
|
|
12
|
-
BAKERY_METAFILE = ".imagebakery-cache"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class BakerException(MetaflowException):
|
|
16
|
-
headline = "Ran into an error while baking image"
|
|
17
|
-
|
|
18
|
-
def __init__(self, msg):
|
|
19
|
-
super(BakerException, self).__init__(msg)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def bake_image(
|
|
23
|
-
cache_file_path: str,
|
|
24
|
-
ref: Optional[str] = None,
|
|
25
|
-
python: Optional[str] = None,
|
|
26
|
-
pypi_packages: Optional[Dict[str, str]] = None,
|
|
27
|
-
conda_packages: Optional[Dict[str, str]] = None,
|
|
28
|
-
base_image: Optional[str] = None,
|
|
29
|
-
logger: Optional[Callable[[str], Any]] = None,
|
|
30
|
-
) -> FastBakeryApiResponse:
|
|
31
|
-
"""
|
|
32
|
-
Bakes a Docker image with the specified dependencies.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
cache_file_path: Path to the cache file
|
|
36
|
-
ref: Reference identifier for this bake (for logging purposes)
|
|
37
|
-
python: Python version to use
|
|
38
|
-
pypi_packages: Dictionary of PyPI packages and versions
|
|
39
|
-
conda_packages: Dictionary of Conda packages and versions
|
|
40
|
-
base_image: Base Docker image to use
|
|
41
|
-
logger: Optional logger function to output progress
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
FastBakeryApiResponse: The response from the bakery service
|
|
45
|
-
|
|
46
|
-
Raises:
|
|
47
|
-
BakerException: If the baking process fails
|
|
48
|
-
"""
|
|
49
|
-
# Default logger if none provided
|
|
50
|
-
if logger is None:
|
|
51
|
-
logger = partial(print, file=sys.stderr)
|
|
52
|
-
|
|
53
|
-
# Thread lock for logging
|
|
54
|
-
logger_lock = threading.Lock()
|
|
55
|
-
images_baked = 0
|
|
56
|
-
|
|
57
|
-
@cache_request(cache_file_path)
|
|
58
|
-
def _cached_bake(
|
|
59
|
-
ref=None,
|
|
60
|
-
python=None,
|
|
61
|
-
pypi_packages=None,
|
|
62
|
-
conda_packages=None,
|
|
63
|
-
base_image=None,
|
|
64
|
-
):
|
|
65
|
-
try:
|
|
66
|
-
bakery = FastBakery(url=FAST_BAKERY_URL)
|
|
67
|
-
bakery._reset_payload()
|
|
68
|
-
bakery.python_version(python)
|
|
69
|
-
bakery.pypi_packages(pypi_packages)
|
|
70
|
-
bakery.conda_packages(conda_packages)
|
|
71
|
-
bakery.base_image(base_image)
|
|
72
|
-
# bakery.ignore_cache()
|
|
73
|
-
|
|
74
|
-
with logger_lock:
|
|
75
|
-
logger(f"🍳 Baking [{ref}] ...")
|
|
76
|
-
logger(f" 🐍 Python: {python}")
|
|
77
|
-
|
|
78
|
-
if pypi_packages:
|
|
79
|
-
logger(f" 📦 PyPI packages:")
|
|
80
|
-
for package, version in pypi_packages.items():
|
|
81
|
-
logger(f" 🔧 {package}: {version}")
|
|
82
|
-
|
|
83
|
-
if conda_packages:
|
|
84
|
-
logger(f" 📦 Conda packages:")
|
|
85
|
-
for package, version in conda_packages.items():
|
|
86
|
-
logger(f" 🔧 {package}: {version}")
|
|
87
|
-
|
|
88
|
-
logger(f" 🏗️ Base image: {base_image}")
|
|
89
|
-
|
|
90
|
-
start_time = time.time()
|
|
91
|
-
res = bakery.bake()
|
|
92
|
-
# TODO: Get actual bake time from bakery
|
|
93
|
-
bake_time = time.time() - start_time
|
|
94
|
-
|
|
95
|
-
with logger_lock:
|
|
96
|
-
logger(f"🏁 Baked [{ref}] in {bake_time:.2f} seconds!")
|
|
97
|
-
nonlocal images_baked
|
|
98
|
-
images_baked += 1
|
|
99
|
-
return res
|
|
100
|
-
except FastBakeryException as ex:
|
|
101
|
-
raise BakerException(f"Bake [{ref}] failed: {str(ex)}")
|
|
102
|
-
|
|
103
|
-
# Call the cached bake function with the provided parameters
|
|
104
|
-
return _cached_bake(
|
|
105
|
-
ref=ref,
|
|
106
|
-
python=python,
|
|
107
|
-
pypi_packages=pypi_packages,
|
|
108
|
-
conda_packages=conda_packages,
|
|
109
|
-
base_image=base_image,
|
|
110
|
-
)
|
{ob_metaflow_extensions-1.1.168rc5.dist-info → ob_metaflow_extensions-1.1.170.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|