metaflow 2.15.7__py2.py3-none-any.whl → 2.15.9__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/cli.py +8 -0
- metaflow/cli_components/run_cmds.py +2 -2
- metaflow/cmd/main_cli.py +1 -1
- metaflow/metadata_provider/metadata.py +35 -0
- metaflow/metaflow_config.py +6 -0
- metaflow/metaflow_environment.py +6 -1
- metaflow/metaflow_git.py +115 -0
- metaflow/metaflow_version.py +2 -2
- metaflow/plugins/__init__.py +1 -0
- metaflow/plugins/argo/argo_workflows.py +43 -6
- metaflow/plugins/argo/argo_workflows_cli.py +11 -0
- metaflow/plugins/aws/aws_client.py +4 -3
- metaflow/plugins/datatools/s3/s3.py +46 -44
- metaflow/plugins/datatools/s3/s3op.py +133 -63
- metaflow/plugins/kubernetes/kubernetes.py +4 -0
- metaflow/plugins/kubernetes/kubernetes_cli.py +8 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +10 -0
- metaflow/plugins/kubernetes/kubernetes_job.py +8 -0
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +7 -0
- metaflow/plugins/uv/__init__.py +0 -0
- metaflow/plugins/uv/bootstrap.py +100 -0
- metaflow/plugins/uv/uv_environment.py +70 -0
- metaflow/runner/deployer.py +8 -2
- metaflow/runner/deployer_impl.py +6 -2
- metaflow/runner/metaflow_runner.py +7 -2
- metaflow/version.py +1 -1
- {metaflow-2.15.7.data → metaflow-2.15.9.data}/data/share/metaflow/devtools/Makefile +2 -0
- {metaflow-2.15.7.dist-info → metaflow-2.15.9.dist-info}/METADATA +2 -2
- {metaflow-2.15.7.dist-info → metaflow-2.15.9.dist-info}/RECORD +35 -31
- {metaflow-2.15.7.dist-info → metaflow-2.15.9.dist-info}/WHEEL +1 -1
- {metaflow-2.15.7.data → metaflow-2.15.9.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.15.7.data → metaflow-2.15.9.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.15.7.dist-info → metaflow-2.15.9.dist-info}/entry_points.txt +0 -0
- {metaflow-2.15.7.dist-info → metaflow-2.15.9.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.15.7.dist-info → metaflow-2.15.9.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,10 @@ from tempfile import NamedTemporaryFile
|
|
15
15
|
from multiprocessing import Process, Queue
|
16
16
|
from itertools import starmap, chain, islice
|
17
17
|
|
18
|
+
from boto3.exceptions import RetriesExceededError, S3UploadFailedError
|
18
19
|
from boto3.s3.transfer import TransferConfig
|
20
|
+
from botocore.config import Config
|
21
|
+
from botocore.exceptions import ClientError, SSLError
|
19
22
|
|
20
23
|
try:
|
21
24
|
# python2
|
@@ -46,13 +49,21 @@ from metaflow.plugins.datatools.s3.s3util import (
|
|
46
49
|
import metaflow.tracing as tracing
|
47
50
|
from metaflow.metaflow_config import (
|
48
51
|
S3_WORKER_COUNT,
|
52
|
+
S3_CLIENT_RETRY_CONFIG,
|
49
53
|
)
|
50
54
|
|
51
55
|
DOWNLOAD_FILE_THRESHOLD = 2 * TransferConfig().multipart_threshold
|
52
56
|
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
|
53
57
|
|
58
|
+
DEFAULT_S3_CLIENT_PARAMS = {"config": Config(retries=S3_CLIENT_RETRY_CONFIG)}
|
54
59
|
RANGE_MATCH = re.compile(r"bytes (?P<start>[0-9]+)-(?P<end>[0-9]+)/(?P<total>[0-9]+)")
|
55
60
|
|
61
|
+
# from botocore ClientError MSG_TEMPLATE:
|
62
|
+
# https://github.com/boto/botocore/blob/68ca78f3097906c9231840a49931ef4382c41eea/botocore/exceptions.py#L521
|
63
|
+
BOTOCORE_MSG_TEMPLATE_MATCH = re.compile(
|
64
|
+
r"An error occurred \((\w+)\) when calling the (\w+) operation.*: (.+)"
|
65
|
+
)
|
66
|
+
|
56
67
|
S3Config = namedtuple("S3Config", "role session_vars client_params")
|
57
68
|
|
58
69
|
|
@@ -147,6 +158,7 @@ def normalize_client_error(err):
|
|
147
158
|
"LimitExceededException",
|
148
159
|
"RequestThrottled",
|
149
160
|
"EC2ThrottledException",
|
161
|
+
"InternalError",
|
150
162
|
):
|
151
163
|
return 503
|
152
164
|
return error_code
|
@@ -221,54 +233,57 @@ def worker(result_file_name, queue, mode, s3config):
|
|
221
233
|
elif mode == "download":
|
222
234
|
tmp = NamedTemporaryFile(dir=".", mode="wb", delete=False)
|
223
235
|
try:
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
range_result = resp["ContentRange"]
|
229
|
-
range_result_match = RANGE_MATCH.match(range_result)
|
230
|
-
if range_result_match is None:
|
231
|
-
raise RuntimeError(
|
232
|
-
"Wrong format for ContentRange: %s"
|
233
|
-
% str(range_result)
|
236
|
+
try:
|
237
|
+
if url.range:
|
238
|
+
resp = s3.get_object(
|
239
|
+
Bucket=url.bucket, Key=url.path, Range=url.range
|
234
240
|
)
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
)
|
241
|
+
range_result = resp["ContentRange"]
|
242
|
+
range_result_match = RANGE_MATCH.match(range_result)
|
243
|
+
if range_result_match is None:
|
244
|
+
raise RuntimeError(
|
245
|
+
"Wrong format for ContentRange: %s"
|
246
|
+
% str(range_result)
|
247
|
+
)
|
248
|
+
range_result = {
|
249
|
+
x: int(range_result_match.group(x))
|
250
|
+
for x in ["total", "start", "end"]
|
251
|
+
}
|
252
|
+
else:
|
253
|
+
resp = s3.get_object(Bucket=url.bucket, Key=url.path)
|
254
|
+
range_result = None
|
255
|
+
sz = resp["ContentLength"]
|
256
|
+
if range_result is None:
|
257
|
+
range_result = {"total": sz, "start": 0, "end": sz - 1}
|
258
|
+
if not url.range and sz > DOWNLOAD_FILE_THRESHOLD:
|
259
|
+
# In this case, it is more efficient to use download_file as it
|
260
|
+
# will download multiple parts in parallel (it does it after
|
261
|
+
# multipart_threshold)
|
262
|
+
s3.download_file(url.bucket, url.path, tmp.name)
|
263
|
+
else:
|
264
|
+
read_in_chunks(
|
265
|
+
tmp, resp["Body"], sz, DOWNLOAD_MAX_CHUNK
|
266
|
+
)
|
267
|
+
tmp.close()
|
268
|
+
os.rename(tmp.name, url.local)
|
269
|
+
except client_error as err:
|
270
|
+
tmp.close()
|
271
|
+
os.unlink(tmp.name)
|
272
|
+
handle_client_error(err, idx, result_file)
|
265
273
|
continue
|
266
|
-
|
267
|
-
|
274
|
+
except RetriesExceededError as e:
|
275
|
+
tmp.close()
|
276
|
+
os.unlink(tmp.name)
|
277
|
+
err = convert_to_client_error(e)
|
278
|
+
handle_client_error(err, idx, result_file)
|
268
279
|
continue
|
269
|
-
|
270
|
-
|
271
|
-
|
280
|
+
except (SSLError, Exception) as e:
|
281
|
+
tmp.close()
|
282
|
+
os.unlink(tmp.name)
|
283
|
+
# assume anything else is transient
|
284
|
+
result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT))
|
285
|
+
result_file.flush()
|
286
|
+
continue
|
272
287
|
# If we need the metadata, get it and write it out
|
273
288
|
if pre_op_info:
|
274
289
|
with open("%s_meta" % url.local, mode="w") as f:
|
@@ -316,28 +331,67 @@ def worker(result_file_name, queue, mode, s3config):
|
|
316
331
|
if url.encryption is not None:
|
317
332
|
extra["ServerSideEncryption"] = url.encryption
|
318
333
|
try:
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
# We indicate that the file was uploaded
|
323
|
-
result_file.write("%d %d\n" % (idx, 0))
|
324
|
-
except client_error as err:
|
325
|
-
error_code = normalize_client_error(err)
|
326
|
-
if error_code == 403:
|
327
|
-
result_file.write(
|
328
|
-
"%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED)
|
334
|
+
try:
|
335
|
+
s3.upload_file(
|
336
|
+
url.local, url.bucket, url.path, ExtraArgs=extra
|
329
337
|
)
|
338
|
+
# We indicate that the file was uploaded
|
339
|
+
result_file.write("%d %d\n" % (idx, 0))
|
340
|
+
except client_error as err:
|
341
|
+
# Shouldn't get here, but just in case.
|
342
|
+
# Internally, botocore catches ClientError and returns a S3UploadFailedError.
|
343
|
+
# See https://github.com/boto/boto3/blob/develop/boto3/s3/transfer.py#L377
|
344
|
+
handle_client_error(err, idx, result_file)
|
330
345
|
continue
|
331
|
-
|
332
|
-
|
346
|
+
except S3UploadFailedError as e:
|
347
|
+
err = convert_to_client_error(e)
|
348
|
+
handle_client_error(err, idx, result_file)
|
333
349
|
continue
|
334
|
-
|
335
|
-
|
350
|
+
except (SSLError, Exception) as e:
|
351
|
+
# assume anything else is transient
|
352
|
+
result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT))
|
353
|
+
result_file.flush()
|
354
|
+
continue
|
336
355
|
except:
|
337
356
|
traceback.print_exc()
|
357
|
+
result_file.flush()
|
338
358
|
sys.exit(ERROR_WORKER_EXCEPTION)
|
339
359
|
|
340
360
|
|
361
|
+
def convert_to_client_error(e):
|
362
|
+
match = BOTOCORE_MSG_TEMPLATE_MATCH.search(str(e))
|
363
|
+
if not match:
|
364
|
+
raise e
|
365
|
+
error_code = match.group(1)
|
366
|
+
operation_name = match.group(2)
|
367
|
+
error_message = match.group(3)
|
368
|
+
response = {
|
369
|
+
"Error": {
|
370
|
+
"Code": error_code,
|
371
|
+
"Message": error_message,
|
372
|
+
}
|
373
|
+
}
|
374
|
+
return ClientError(response, operation_name)
|
375
|
+
|
376
|
+
|
377
|
+
def handle_client_error(err, idx, result_file):
|
378
|
+
error_code = normalize_client_error(err)
|
379
|
+
if error_code == 404:
|
380
|
+
result_file.write("%d %d\n" % (idx, -ERROR_URL_NOT_FOUND))
|
381
|
+
result_file.flush()
|
382
|
+
elif error_code == 403:
|
383
|
+
result_file.write("%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED))
|
384
|
+
result_file.flush()
|
385
|
+
elif error_code == 503:
|
386
|
+
result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT))
|
387
|
+
result_file.flush()
|
388
|
+
else:
|
389
|
+
# optimistically assume it is a transient error
|
390
|
+
result_file.write("%d %d\n" % (idx, -ERROR_TRANSIENT))
|
391
|
+
result_file.flush()
|
392
|
+
# TODO specific error message for out of disk space
|
393
|
+
|
394
|
+
|
341
395
|
def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
342
396
|
# We start the minimum of len(urls) or num_workers to avoid starting
|
343
397
|
# workers that will definitely do nothing
|
@@ -381,6 +435,22 @@ def start_workers(mode, urls, num_workers, inject_failure, s3config):
|
|
381
435
|
if proc.exitcode is not None:
|
382
436
|
if proc.exitcode != 0:
|
383
437
|
msg = "Worker process failed (exit code %d)" % proc.exitcode
|
438
|
+
|
439
|
+
# IMPORTANT: if this process has put items on a queue, then it will not terminate
|
440
|
+
# until all buffered items have been flushed to the pipe, causing a deadlock.
|
441
|
+
# `cancel_join_thread()` allows it to exit without flushing the queue.
|
442
|
+
# Without this line, the parent process would hang indefinitely when a subprocess
|
443
|
+
# did not exit cleanly in the case of unhandled exceptions.
|
444
|
+
#
|
445
|
+
# The error situation is:
|
446
|
+
# 1. this process puts stuff in queue
|
447
|
+
# 2. subprocess dies so doesn't consume its end-of-queue marker (the None)
|
448
|
+
# 3. other subprocesses consume all useful bits AND their end-of-queue marker
|
449
|
+
# 4. one marker is left and not consumed
|
450
|
+
# 5. this process cannot shut down until the queue is empty.
|
451
|
+
# 6. it will never be empty because all subprocesses (workers) have died.
|
452
|
+
queue.cancel_join_thread()
|
453
|
+
|
384
454
|
exit(msg, proc.exitcode)
|
385
455
|
# Read the output file if all went well
|
386
456
|
with open(out_path, "r") as out_file:
|
@@ -745,7 +815,7 @@ def lst(
|
|
745
815
|
s3config = S3Config(
|
746
816
|
s3role,
|
747
817
|
json.loads(s3sessionvars) if s3sessionvars else None,
|
748
|
-
json.loads(s3clientparams) if s3clientparams else
|
818
|
+
json.loads(s3clientparams) if s3clientparams else DEFAULT_S3_CLIENT_PARAMS,
|
749
819
|
)
|
750
820
|
|
751
821
|
urllist = []
|
@@ -878,7 +948,7 @@ def put(
|
|
878
948
|
s3config = S3Config(
|
879
949
|
s3role,
|
880
950
|
json.loads(s3sessionvars) if s3sessionvars else None,
|
881
|
-
json.loads(s3clientparams) if s3clientparams else
|
951
|
+
json.loads(s3clientparams) if s3clientparams else DEFAULT_S3_CLIENT_PARAMS,
|
882
952
|
)
|
883
953
|
|
884
954
|
urls = list(starmap(_make_url, _files()))
|
@@ -1025,7 +1095,7 @@ def get(
|
|
1025
1095
|
s3config = S3Config(
|
1026
1096
|
s3role,
|
1027
1097
|
json.loads(s3sessionvars) if s3sessionvars else None,
|
1028
|
-
json.loads(s3clientparams) if s3clientparams else
|
1098
|
+
json.loads(s3clientparams) if s3clientparams else DEFAULT_S3_CLIENT_PARAMS,
|
1029
1099
|
)
|
1030
1100
|
|
1031
1101
|
# Construct a list of URL (prefix) objects
|
@@ -1172,7 +1242,7 @@ def info(
|
|
1172
1242
|
s3config = S3Config(
|
1173
1243
|
s3role,
|
1174
1244
|
json.loads(s3sessionvars) if s3sessionvars else None,
|
1175
|
-
json.loads(s3clientparams) if s3clientparams else
|
1245
|
+
json.loads(s3clientparams) if s3clientparams else DEFAULT_S3_CLIENT_PARAMS,
|
1176
1246
|
)
|
1177
1247
|
|
1178
1248
|
# Construct a list of URL (prefix) objects
|
@@ -194,6 +194,7 @@ class Kubernetes(object):
|
|
194
194
|
port=None,
|
195
195
|
num_parallel=None,
|
196
196
|
qos=None,
|
197
|
+
security_context=None,
|
197
198
|
):
|
198
199
|
name = "js-%s" % str(uuid4())[:6]
|
199
200
|
jobset = (
|
@@ -227,6 +228,7 @@ class Kubernetes(object):
|
|
227
228
|
port=port,
|
228
229
|
num_parallel=num_parallel,
|
229
230
|
qos=qos,
|
231
|
+
security_context=security_context,
|
230
232
|
)
|
231
233
|
.environment_variable("METAFLOW_CODE_SHA", code_package_sha)
|
232
234
|
.environment_variable("METAFLOW_CODE_URL", code_package_url)
|
@@ -488,6 +490,7 @@ class Kubernetes(object):
|
|
488
490
|
name_pattern=None,
|
489
491
|
qos=None,
|
490
492
|
annotations=None,
|
493
|
+
security_context=None,
|
491
494
|
):
|
492
495
|
if env is None:
|
493
496
|
env = {}
|
@@ -530,6 +533,7 @@ class Kubernetes(object):
|
|
530
533
|
shared_memory=shared_memory,
|
531
534
|
port=port,
|
532
535
|
qos=qos,
|
536
|
+
security_context=security_context,
|
533
537
|
)
|
534
538
|
.environment_variable("METAFLOW_CODE_SHA", code_package_sha)
|
535
539
|
.environment_variable("METAFLOW_CODE_URL", code_package_url)
|
@@ -145,6 +145,12 @@ def kubernetes():
|
|
145
145
|
type=JSONTypeClass(),
|
146
146
|
multiple=False,
|
147
147
|
)
|
148
|
+
@click.option(
|
149
|
+
"--security-context",
|
150
|
+
default=None,
|
151
|
+
type=JSONTypeClass(),
|
152
|
+
multiple=False,
|
153
|
+
)
|
148
154
|
@click.pass_context
|
149
155
|
def step(
|
150
156
|
ctx,
|
@@ -176,6 +182,7 @@ def step(
|
|
176
182
|
qos=None,
|
177
183
|
labels=None,
|
178
184
|
annotations=None,
|
185
|
+
security_context=None,
|
179
186
|
**kwargs
|
180
187
|
):
|
181
188
|
def echo(msg, stream="stderr", job_id=None, **kwargs):
|
@@ -319,6 +326,7 @@ def step(
|
|
319
326
|
qos=qos,
|
320
327
|
labels=labels,
|
321
328
|
annotations=annotations,
|
329
|
+
security_context=security_context,
|
322
330
|
)
|
323
331
|
except Exception:
|
324
332
|
traceback.print_exc(chain=False)
|
@@ -122,6 +122,14 @@ class KubernetesDecorator(StepDecorator):
|
|
122
122
|
Only applicable when @parallel is used.
|
123
123
|
qos: str, default: Burstable
|
124
124
|
Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
|
125
|
+
|
126
|
+
security_context: Dict[str, Any], optional, default None
|
127
|
+
Container security context. Applies to the task container. Allows the following keys:
|
128
|
+
- privileged: bool, optional, default None
|
129
|
+
- allow_privilege_escalation: bool, optional, default None
|
130
|
+
- run_as_user: int, optional, default None
|
131
|
+
- run_as_group: int, optional, default None
|
132
|
+
- run_as_non_root: bool, optional, default None
|
125
133
|
"""
|
126
134
|
|
127
135
|
name = "kubernetes"
|
@@ -152,6 +160,7 @@ class KubernetesDecorator(StepDecorator):
|
|
152
160
|
"executable": None,
|
153
161
|
"hostname_resolution_timeout": 10 * 60,
|
154
162
|
"qos": KUBERNETES_QOS,
|
163
|
+
"security_context": None,
|
155
164
|
}
|
156
165
|
package_url = None
|
157
166
|
package_sha = None
|
@@ -474,6 +483,7 @@ class KubernetesDecorator(StepDecorator):
|
|
474
483
|
"persistent_volume_claims",
|
475
484
|
"labels",
|
476
485
|
"annotations",
|
486
|
+
"security_context",
|
477
487
|
]:
|
478
488
|
cli_args.command_options[k] = json.dumps(v)
|
479
489
|
else:
|
@@ -80,6 +80,13 @@ class KubernetesJob(object):
|
|
80
80
|
self._kwargs["disk"],
|
81
81
|
)
|
82
82
|
|
83
|
+
security_context = self._kwargs.get("security_context", {})
|
84
|
+
_security_context = {}
|
85
|
+
if security_context is not None and len(security_context) > 0:
|
86
|
+
_security_context = {
|
87
|
+
"security_context": client.V1SecurityContext(**security_context)
|
88
|
+
}
|
89
|
+
|
83
90
|
return client.V1JobSpec(
|
84
91
|
# Retries are handled by Metaflow when it is responsible for
|
85
92
|
# executing the flow. The responsibility is moved to Kubernetes
|
@@ -203,6 +210,7 @@ class KubernetesJob(object):
|
|
203
210
|
if self._kwargs["persistent_volume_claims"] is not None
|
204
211
|
else []
|
205
212
|
),
|
213
|
+
**_security_context,
|
206
214
|
)
|
207
215
|
],
|
208
216
|
node_selector=self._kwargs.get("node_selector"),
|
@@ -562,6 +562,12 @@ class JobSetSpec(object):
|
|
562
562
|
self._kwargs["memory"],
|
563
563
|
self._kwargs["disk"],
|
564
564
|
)
|
565
|
+
security_context = self._kwargs.get("security_context", {})
|
566
|
+
_security_context = {}
|
567
|
+
if security_context is not None and len(security_context) > 0:
|
568
|
+
_security_context = {
|
569
|
+
"security_context": client.V1SecurityContext(**security_context)
|
570
|
+
}
|
565
571
|
return dict(
|
566
572
|
name=self.name,
|
567
573
|
template=client.api_client.ApiClient().sanitize_for_serialization(
|
@@ -708,6 +714,7 @@ class JobSetSpec(object):
|
|
708
714
|
is not None
|
709
715
|
else []
|
710
716
|
),
|
717
|
+
**_security_context,
|
711
718
|
)
|
712
719
|
],
|
713
720
|
node_selector=self._kwargs.get("node_selector"),
|
File without changes
|
@@ -0,0 +1,100 @@
|
|
1
|
+
import os
|
2
|
+
import subprocess
|
3
|
+
import sys
|
4
|
+
import time
|
5
|
+
|
6
|
+
from metaflow.util import which
|
7
|
+
from metaflow.metaflow_config import get_pinned_conda_libs
|
8
|
+
from urllib.request import Request, urlopen
|
9
|
+
from urllib.error import URLError
|
10
|
+
|
11
|
+
# TODO: support version/platform/architecture selection.
|
12
|
+
UV_URL = "https://github.com/astral-sh/uv/releases/download/0.6.11/uv-x86_64-unknown-linux-gnu.tar.gz"
|
13
|
+
|
14
|
+
if __name__ == "__main__":
|
15
|
+
|
16
|
+
def run_cmd(cmd, stdin_str=None):
|
17
|
+
result = subprocess.run(
|
18
|
+
cmd,
|
19
|
+
shell=True,
|
20
|
+
input=stdin_str,
|
21
|
+
stdout=subprocess.PIPE,
|
22
|
+
stderr=subprocess.PIPE,
|
23
|
+
text=True,
|
24
|
+
)
|
25
|
+
if result.returncode != 0:
|
26
|
+
print(f"Bootstrap failed while executing: {cmd}")
|
27
|
+
print("Stdout:", result.stdout)
|
28
|
+
print("Stderr:", result.stderr)
|
29
|
+
sys.exit(1)
|
30
|
+
|
31
|
+
def install_uv():
|
32
|
+
import tarfile
|
33
|
+
|
34
|
+
uv_install_path = os.path.join(os.getcwd(), "uv_install")
|
35
|
+
if which("uv"):
|
36
|
+
return
|
37
|
+
|
38
|
+
print("Installing uv...")
|
39
|
+
|
40
|
+
# Prepare directory once
|
41
|
+
os.makedirs(uv_install_path, exist_ok=True)
|
42
|
+
|
43
|
+
# Download and decompress in one go
|
44
|
+
headers = {
|
45
|
+
"Accept-Encoding": "gzip, deflate, br",
|
46
|
+
"Connection": "keep-alive",
|
47
|
+
"User-Agent": "python-urllib",
|
48
|
+
}
|
49
|
+
|
50
|
+
def _tar_filter(member: tarfile.TarInfo, path):
|
51
|
+
if os.path.basename(member.name) != "uv":
|
52
|
+
return None # skip
|
53
|
+
member.path = os.path.basename(member.path)
|
54
|
+
return member
|
55
|
+
|
56
|
+
max_retries = 3
|
57
|
+
for attempt in range(max_retries):
|
58
|
+
try:
|
59
|
+
req = Request(UV_URL, headers=headers)
|
60
|
+
with urlopen(req) as response:
|
61
|
+
with tarfile.open(fileobj=response, mode="r:gz") as tar:
|
62
|
+
tar.extractall(uv_install_path, filter=_tar_filter)
|
63
|
+
break
|
64
|
+
except (URLError, IOError) as e:
|
65
|
+
if attempt == max_retries - 1:
|
66
|
+
raise Exception(
|
67
|
+
f"Failed to download UV after {max_retries} attempts: {e}"
|
68
|
+
)
|
69
|
+
time.sleep(2**attempt)
|
70
|
+
|
71
|
+
# Update PATH only once at the end
|
72
|
+
os.environ["PATH"] += os.pathsep + uv_install_path
|
73
|
+
|
74
|
+
def get_dependencies(datastore_type):
|
75
|
+
# return required dependencies for Metaflow that must be added to the UV environment.
|
76
|
+
pinned = get_pinned_conda_libs(None, datastore_type)
|
77
|
+
|
78
|
+
# return only dependency names instead of pinned versions
|
79
|
+
return pinned.keys()
|
80
|
+
|
81
|
+
def sync_uv_project(datastore_type):
|
82
|
+
print("Syncing uv project...")
|
83
|
+
dependencies = " ".join(get_dependencies(datastore_type))
|
84
|
+
cmd = f"""set -e;
|
85
|
+
uv sync --frozen --no-install-package metaflow;
|
86
|
+
uv pip install {dependencies} --strict
|
87
|
+
"""
|
88
|
+
run_cmd(cmd)
|
89
|
+
|
90
|
+
if len(sys.argv) != 2:
|
91
|
+
print("Usage: bootstrap.py <datastore_type>")
|
92
|
+
sys.exit(1)
|
93
|
+
|
94
|
+
try:
|
95
|
+
datastore_type = sys.argv[1]
|
96
|
+
install_uv()
|
97
|
+
sync_uv_project(datastore_type)
|
98
|
+
except Exception as e:
|
99
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
100
|
+
sys.exit(1)
|
@@ -0,0 +1,70 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from metaflow.exception import MetaflowException
|
4
|
+
from metaflow.metaflow_environment import MetaflowEnvironment
|
5
|
+
|
6
|
+
|
7
|
+
class UVException(MetaflowException):
|
8
|
+
headline = "uv error"
|
9
|
+
|
10
|
+
|
11
|
+
class UVEnvironment(MetaflowEnvironment):
|
12
|
+
TYPE = "uv"
|
13
|
+
|
14
|
+
def __init__(self, flow):
|
15
|
+
self.flow = flow
|
16
|
+
|
17
|
+
def validate_environment(self, logger, datastore_type):
|
18
|
+
self.datastore_type = datastore_type
|
19
|
+
self.logger = logger
|
20
|
+
|
21
|
+
def init_environment(self, echo, only_steps=None):
|
22
|
+
self.logger("Bootstrapping uv...")
|
23
|
+
|
24
|
+
def executable(self, step_name, default=None):
|
25
|
+
return "uv run python"
|
26
|
+
|
27
|
+
def add_to_package(self):
|
28
|
+
# NOTE: We treat uv.lock and pyproject.toml as regular project assets and ship these along user code as part of the code package
|
29
|
+
# These are the minimal required files to reproduce the UV environment on the remote platform.
|
30
|
+
def _find(filename):
|
31
|
+
current_dir = os.getcwd()
|
32
|
+
while True:
|
33
|
+
file_path = os.path.join(current_dir, filename)
|
34
|
+
if os.path.isfile(file_path):
|
35
|
+
return file_path
|
36
|
+
parent_dir = os.path.dirname(current_dir)
|
37
|
+
if parent_dir == current_dir: # Reached root
|
38
|
+
raise UVException(
|
39
|
+
f"Could not find {filename} in current directory or any parent directory"
|
40
|
+
)
|
41
|
+
current_dir = parent_dir
|
42
|
+
|
43
|
+
pyproject_path = _find("pyproject.toml")
|
44
|
+
uv_lock_path = _find("uv.lock")
|
45
|
+
files = [
|
46
|
+
(uv_lock_path, "uv.lock"),
|
47
|
+
(pyproject_path, "pyproject.toml"),
|
48
|
+
]
|
49
|
+
return files
|
50
|
+
|
51
|
+
def pylint_config(self):
|
52
|
+
config = super().pylint_config()
|
53
|
+
# Disable (import-error) in pylint
|
54
|
+
config.append("--disable=F0401")
|
55
|
+
return config
|
56
|
+
|
57
|
+
def bootstrap_commands(self, step_name, datastore_type):
|
58
|
+
return [
|
59
|
+
"echo 'Bootstrapping uv project...'",
|
60
|
+
"flush_mflogs",
|
61
|
+
# We have to prevent the tracing module from loading, as the bootstrapping process
|
62
|
+
# uses the internal S3 client which would fail to import tracing due to the required
|
63
|
+
# dependencies being bundled into the conda environment, which is yet to be
|
64
|
+
# initialized at this point.
|
65
|
+
'DISABLE_TRACING=True python -m metaflow.plugins.uv.bootstrap "%s"'
|
66
|
+
% datastore_type,
|
67
|
+
"echo 'uv project bootstrapped.'",
|
68
|
+
"flush_mflogs",
|
69
|
+
"export PATH=$PATH:$(pwd)/uv_install",
|
70
|
+
]
|
metaflow/runner/deployer.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import os
|
1
2
|
import json
|
2
3
|
import time
|
3
4
|
|
@@ -52,7 +53,7 @@ class Deployer(metaclass=DeployerMeta):
|
|
52
53
|
Parameters
|
53
54
|
----------
|
54
55
|
flow_file : str
|
55
|
-
Path to the flow file to deploy.
|
56
|
+
Path to the flow file to deploy, relative to current directory.
|
56
57
|
show_output : bool, default True
|
57
58
|
Show the 'stdout' and 'stderr' to the console by default.
|
58
59
|
profile : Optional[str], default None
|
@@ -80,7 +81,12 @@ class Deployer(metaclass=DeployerMeta):
|
|
80
81
|
file_read_timeout: int = 3600,
|
81
82
|
**kwargs,
|
82
83
|
):
|
83
|
-
|
84
|
+
# Convert flow_file to absolute path if it's relative
|
85
|
+
if not os.path.isabs(flow_file):
|
86
|
+
self.flow_file = os.path.abspath(flow_file)
|
87
|
+
else:
|
88
|
+
self.flow_file = flow_file
|
89
|
+
|
84
90
|
self.show_output = show_output
|
85
91
|
self.profile = profile
|
86
92
|
self.env = env
|
metaflow/runner/deployer_impl.py
CHANGED
@@ -25,7 +25,7 @@ class DeployerImpl(object):
|
|
25
25
|
Parameters
|
26
26
|
----------
|
27
27
|
flow_file : str
|
28
|
-
Path to the flow file to deploy.
|
28
|
+
Path to the flow file to deploy, relative to current directory.
|
29
29
|
show_output : bool, default True
|
30
30
|
Show the 'stdout' and 'stderr' to the console by default.
|
31
31
|
profile : Optional[str], default None
|
@@ -80,7 +80,11 @@ class DeployerImpl(object):
|
|
80
80
|
from metaflow.cli import start
|
81
81
|
from metaflow.runner.click_api import MetaflowAPI
|
82
82
|
|
83
|
-
|
83
|
+
# Convert flow_file to absolute path if it's relative
|
84
|
+
if not os.path.isabs(flow_file):
|
85
|
+
self.flow_file = os.path.abspath(flow_file)
|
86
|
+
else:
|
87
|
+
self.flow_file = flow_file
|
84
88
|
self.show_output = show_output
|
85
89
|
self.profile = profile
|
86
90
|
self.env = env
|
@@ -229,7 +229,7 @@ class Runner(metaclass=RunnerMeta):
|
|
229
229
|
Parameters
|
230
230
|
----------
|
231
231
|
flow_file : str
|
232
|
-
Path to the flow file to run
|
232
|
+
Path to the flow file to run, relative to current directory.
|
233
233
|
show_output : bool, default True
|
234
234
|
Show the 'stdout' and 'stderr' to the console by default,
|
235
235
|
Only applicable for synchronous 'run' and 'resume' functions.
|
@@ -286,7 +286,12 @@ class Runner(metaclass=RunnerMeta):
|
|
286
286
|
from metaflow.cli import start
|
287
287
|
from metaflow.runner.click_api import MetaflowAPI
|
288
288
|
|
289
|
-
|
289
|
+
# Convert flow_file to absolute path if it's relative
|
290
|
+
if not os.path.isabs(flow_file):
|
291
|
+
self.flow_file = os.path.abspath(flow_file)
|
292
|
+
else:
|
293
|
+
self.flow_file = flow_file
|
294
|
+
|
290
295
|
self.show_output = show_output
|
291
296
|
|
292
297
|
self.env_vars = os.environ.copy()
|
metaflow/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
metaflow_version = "2.15.
|
1
|
+
metaflow_version = "2.15.9"
|