metaflow 2.14.0__py2.py3-none-any.whl → 2.14.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/cli.py +0 -23
- metaflow/cli_components/run_cmds.py +34 -14
- metaflow/cli_components/step_cmd.py +2 -0
- metaflow/client/core.py +241 -1
- metaflow/cmd/main_cli.py +1 -1
- metaflow/metadata_provider/heartbeat.py +1 -0
- metaflow/metadata_provider/metadata.py +33 -0
- metaflow/metaflow_config.py +5 -9
- metaflow/mflog/save_logs.py +2 -2
- metaflow/plugins/argo/argo_workflows.py +12 -8
- metaflow/plugins/argo/argo_workflows_cli.py +2 -2
- metaflow/plugins/datatools/s3/s3op.py +4 -4
- metaflow/plugins/env_escape/server.py +7 -0
- metaflow/plugins/env_escape/stub.py +21 -4
- metaflow/plugins/kubernetes/kubernetes_cli.py +1 -1
- metaflow/plugins/metadata_providers/local.py +66 -0
- metaflow/plugins/metadata_providers/service.py +51 -0
- metaflow/plugins/pypi/bootstrap.py +4 -4
- metaflow/runner/click_api.py +6 -3
- metaflow/sidecar/sidecar_worker.py +1 -1
- metaflow/task.py +21 -2
- metaflow/tracing/__init__.py +7 -7
- metaflow/tracing/span_exporter.py +31 -38
- metaflow/tracing/tracing_modules.py +39 -44
- metaflow/version.py +1 -1
- {metaflow-2.14.0.dist-info → metaflow-2.14.2.dist-info}/METADATA +2 -2
- {metaflow-2.14.0.dist-info → metaflow-2.14.2.dist-info}/RECORD +31 -31
- {metaflow-2.14.0.dist-info → metaflow-2.14.2.dist-info}/LICENSE +0 -0
- {metaflow-2.14.0.dist-info → metaflow-2.14.2.dist-info}/WHEEL +0 -0
- {metaflow-2.14.0.dist-info → metaflow-2.14.2.dist-info}/entry_points.txt +0 -0
- {metaflow-2.14.0.dist-info → metaflow-2.14.2.dist-info}/top_level.txt +0 -0
@@ -276,9 +276,22 @@ class MetaWithConnection(StubMetaClass):
|
|
276
276
|
if len(args) > 0 and id(args[0]) == id(cls.___class_connection___):
|
277
277
|
return super(MetaWithConnection, cls).__call__(*args, **kwargs)
|
278
278
|
else:
|
279
|
-
|
280
|
-
|
281
|
-
|
279
|
+
if hasattr(cls, "__overriden_init__"):
|
280
|
+
return cls.__overriden_init__(
|
281
|
+
None,
|
282
|
+
functools.partial(
|
283
|
+
cls.___class_connection___.stub_request,
|
284
|
+
None,
|
285
|
+
OP_INIT,
|
286
|
+
cls.___class_remote_class_name___,
|
287
|
+
),
|
288
|
+
*args,
|
289
|
+
**kwargs
|
290
|
+
)
|
291
|
+
else:
|
292
|
+
return cls.___class_connection___.stub_request(
|
293
|
+
None, OP_INIT, cls.___class_remote_class_name___, *args, **kwargs
|
294
|
+
)
|
282
295
|
|
283
296
|
def __subclasscheck__(cls, subclass):
|
284
297
|
subclass_name = "%s.%s" % (subclass.__module__, subclass.__name__)
|
@@ -381,7 +394,10 @@ def create_class(
|
|
381
394
|
name = name[7:]
|
382
395
|
method_type = CLASS_METHOD
|
383
396
|
if name in overriden_methods:
|
384
|
-
if
|
397
|
+
if name == "__init__":
|
398
|
+
class_dict["__overriden_init__"] = overriden_methods["__init__"]
|
399
|
+
|
400
|
+
elif method_type == NORMAL_METHOD:
|
385
401
|
class_dict[name] = (
|
386
402
|
lambda override, orig_method: lambda obj, *args, **kwargs: override(
|
387
403
|
obj, functools.partial(orig_method, obj), *args, **kwargs
|
@@ -412,6 +428,7 @@ def create_class(
|
|
412
428
|
class_dict[name] = _make_method(
|
413
429
|
method_type, connection, class_name, name, doc
|
414
430
|
)
|
431
|
+
|
415
432
|
# Check for any getattr/setattr overrides
|
416
433
|
special_attributes = set(getattr_overrides.keys())
|
417
434
|
special_attributes.update(set(setattr_overrides.keys()))
|
@@ -39,7 +39,7 @@ def kubernetes():
|
|
39
39
|
"command inside a Kubernetes pod with the given options. Typically you do not call "
|
40
40
|
"this command directly; it is used internally by Metaflow."
|
41
41
|
)
|
42
|
-
@tracing.
|
42
|
+
@tracing.cli("kubernetes/step")
|
43
43
|
@click.argument("step-name")
|
44
44
|
@click.argument("code-package-sha")
|
45
45
|
@click.argument("code-package-url")
|
@@ -2,10 +2,12 @@ import collections
|
|
2
2
|
import glob
|
3
3
|
import json
|
4
4
|
import os
|
5
|
+
import re
|
5
6
|
import random
|
6
7
|
import tempfile
|
7
8
|
import time
|
8
9
|
from collections import namedtuple
|
10
|
+
from typing import List
|
9
11
|
|
10
12
|
from metaflow.exception import MetaflowInternalError, MetaflowTaggingError
|
11
13
|
from metaflow.metadata_provider.metadata import ObjectOrder
|
@@ -202,6 +204,70 @@ class LocalMetadataProvider(MetadataProvider):
|
|
202
204
|
"Tagging failed due to too many conflicting updates from other processes"
|
203
205
|
)
|
204
206
|
|
207
|
+
@classmethod
|
208
|
+
def filter_tasks_by_metadata(
|
209
|
+
cls,
|
210
|
+
flow_name: str,
|
211
|
+
run_id: str,
|
212
|
+
step_name: str,
|
213
|
+
field_name: str,
|
214
|
+
pattern: str,
|
215
|
+
) -> List[str]:
|
216
|
+
"""
|
217
|
+
Filter tasks by metadata field and pattern, returning task pathspecs that match criteria.
|
218
|
+
|
219
|
+
Parameters
|
220
|
+
----------
|
221
|
+
flow_name : str
|
222
|
+
Identifier for the flow
|
223
|
+
run_id : str
|
224
|
+
Identifier for the run
|
225
|
+
step_name : str
|
226
|
+
Name of the step to query tasks from
|
227
|
+
field_name : str
|
228
|
+
Name of metadata field to query
|
229
|
+
pattern : str
|
230
|
+
Pattern to match in metadata field value
|
231
|
+
|
232
|
+
Returns
|
233
|
+
-------
|
234
|
+
List[str]
|
235
|
+
List of task pathspecs that match the query criteria
|
236
|
+
"""
|
237
|
+
tasks = cls.get_object("step", "task", {}, None, flow_name, run_id, step_name)
|
238
|
+
if not tasks:
|
239
|
+
return []
|
240
|
+
|
241
|
+
regex = re.compile(pattern)
|
242
|
+
matching_task_pathspecs = []
|
243
|
+
|
244
|
+
for task in tasks:
|
245
|
+
task_id = task.get("task_id")
|
246
|
+
if not task_id:
|
247
|
+
continue
|
248
|
+
|
249
|
+
if pattern == ".*":
|
250
|
+
# If the pattern is ".*", we can match all tasks without reading metadata
|
251
|
+
matching_task_pathspecs.append(
|
252
|
+
f"{flow_name}/{run_id}/{step_name}/{task_id}"
|
253
|
+
)
|
254
|
+
continue
|
255
|
+
|
256
|
+
metadata = cls.get_object(
|
257
|
+
"task", "metadata", {}, None, flow_name, run_id, step_name, task_id
|
258
|
+
)
|
259
|
+
|
260
|
+
if any(
|
261
|
+
meta.get("field_name") == field_name
|
262
|
+
and regex.match(meta.get("value", ""))
|
263
|
+
for meta in metadata
|
264
|
+
):
|
265
|
+
matching_task_pathspecs.append(
|
266
|
+
f"{flow_name}/{run_id}/{step_name}/{task_id}"
|
267
|
+
)
|
268
|
+
|
269
|
+
return matching_task_pathspecs
|
270
|
+
|
205
271
|
@classmethod
|
206
272
|
def _get_object_internal(
|
207
273
|
cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args
|
@@ -4,6 +4,7 @@ import time
|
|
4
4
|
|
5
5
|
import requests
|
6
6
|
|
7
|
+
from typing import List
|
7
8
|
from metaflow.exception import (
|
8
9
|
MetaflowException,
|
9
10
|
MetaflowInternalError,
|
@@ -13,6 +14,7 @@ from metaflow.metadata_provider import MetadataProvider
|
|
13
14
|
from metaflow.metadata_provider.heartbeat import HB_URL_KEY
|
14
15
|
from metaflow.metaflow_config import SERVICE_HEADERS, SERVICE_RETRY_COUNT, SERVICE_URL
|
15
16
|
from metaflow.sidecar import Message, MessageTypes, Sidecar
|
17
|
+
from urllib.parse import urlencode
|
16
18
|
from metaflow.util import version_parse
|
17
19
|
|
18
20
|
|
@@ -318,6 +320,55 @@ class ServiceMetadataProvider(MetadataProvider):
|
|
318
320
|
self._register_system_metadata(run_id, step_name, task["task_id"], attempt)
|
319
321
|
return task["task_id"], did_create
|
320
322
|
|
323
|
+
@classmethod
|
324
|
+
def filter_tasks_by_metadata(
|
325
|
+
cls,
|
326
|
+
flow_name: str,
|
327
|
+
run_id: str,
|
328
|
+
step_name: str,
|
329
|
+
field_name: str,
|
330
|
+
pattern: str,
|
331
|
+
) -> List[str]:
|
332
|
+
"""
|
333
|
+
Filter tasks by metadata field and pattern, returning task pathspecs that match criteria.
|
334
|
+
|
335
|
+
Parameters
|
336
|
+
----------
|
337
|
+
flow_name : str
|
338
|
+
Flow name, that the run belongs to.
|
339
|
+
run_id: str
|
340
|
+
Run id, together with flow_id, that identifies the specific Run whose tasks to query
|
341
|
+
step_name: str
|
342
|
+
Step name to query tasks from
|
343
|
+
field_name: str
|
344
|
+
Metadata field name to query
|
345
|
+
pattern: str
|
346
|
+
Pattern to match in metadata field value
|
347
|
+
|
348
|
+
Returns
|
349
|
+
-------
|
350
|
+
List[str]
|
351
|
+
List of task pathspecs that satisfy the query
|
352
|
+
"""
|
353
|
+
query_params = {
|
354
|
+
"metadata_field_name": field_name,
|
355
|
+
"pattern": pattern,
|
356
|
+
"step_name": step_name,
|
357
|
+
}
|
358
|
+
url = ServiceMetadataProvider._obj_path(flow_name, run_id, step_name)
|
359
|
+
url = f"{url}/filtered_tasks?{urlencode(query_params)}"
|
360
|
+
try:
|
361
|
+
resp = cls._request(None, url, "GET")
|
362
|
+
except Exception as e:
|
363
|
+
if e.http_code == 404:
|
364
|
+
# filter_tasks_by_metadata endpoint does not exist in the version of metadata service
|
365
|
+
# deployed currently. Raise a more informative error message.
|
366
|
+
raise MetaflowInternalError(
|
367
|
+
"The version of metadata service deployed currently does not support filtering tasks by metadata. "
|
368
|
+
"Upgrade Metadata service to version 2.15 or greater to use this feature."
|
369
|
+
) from e
|
370
|
+
return resp
|
371
|
+
|
321
372
|
@staticmethod
|
322
373
|
def _obj_path(
|
323
374
|
flow_name,
|
@@ -10,7 +10,7 @@ import tarfile
|
|
10
10
|
import time
|
11
11
|
from urllib.error import URLError
|
12
12
|
from urllib.request import urlopen
|
13
|
-
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
13
|
+
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
14
14
|
from metaflow.plugins import DATASTORES
|
15
15
|
from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
|
16
16
|
from metaflow.util import which
|
@@ -329,8 +329,6 @@ if __name__ == "__main__":
|
|
329
329
|
|
330
330
|
@timer
|
331
331
|
def fast_setup_environment(architecture, storage, env, prefix, pkgs_dir):
|
332
|
-
install_fast_initializer(architecture)
|
333
|
-
|
334
332
|
# Get package urls
|
335
333
|
conda_pkgs = env["conda"]
|
336
334
|
pypi_pkgs = env.get("pypi", [])
|
@@ -381,7 +379,9 @@ if __name__ == "__main__":
|
|
381
379
|
with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
|
382
380
|
env = json.load(f)[id_][architecture]
|
383
381
|
|
384
|
-
if
|
382
|
+
if datastore_type == "s3":
|
383
|
+
# TODO: Remove this once fast-initializer is ready for all datastores
|
384
|
+
install_fast_initializer(architecture)
|
385
385
|
fast_setup_environment(architecture, storage, env, prefix, pkgs_dir)
|
386
386
|
else:
|
387
387
|
setup_environment(
|
metaflow/runner/click_api.py
CHANGED
@@ -97,12 +97,13 @@ def _method_sanity_check(
|
|
97
97
|
check_type(supplied_v, annotations[supplied_k])
|
98
98
|
except TypeCheckError:
|
99
99
|
raise TypeError(
|
100
|
-
"Invalid type for '%s' (%s), expected: '%s', default is '%s'"
|
100
|
+
"Invalid type for '%s' (%s), expected: '%s', default is '%s' but found '%s'"
|
101
101
|
% (
|
102
102
|
supplied_k,
|
103
103
|
type(supplied_k),
|
104
104
|
annotations[supplied_k],
|
105
105
|
defaults[supplied_k],
|
106
|
+
str(supplied_v),
|
106
107
|
)
|
107
108
|
)
|
108
109
|
|
@@ -218,7 +219,7 @@ def get_inspect_param_obj(p: Union[click.Argument, click.Option], kind: str):
|
|
218
219
|
default=inspect.Parameter.empty if is_vararg else p.default,
|
219
220
|
annotation=annotation,
|
220
221
|
),
|
221
|
-
annotation,
|
222
|
+
Optional[TTuple[annotation]] if is_vararg else annotation,
|
222
223
|
)
|
223
224
|
|
224
225
|
|
@@ -392,7 +393,9 @@ class MetaflowAPI(object):
|
|
392
393
|
options = params.pop("options", {})
|
393
394
|
|
394
395
|
for _, v in args.items():
|
395
|
-
if
|
396
|
+
if v is None:
|
397
|
+
continue
|
398
|
+
if isinstance(v, (list, tuple)):
|
396
399
|
for i in v:
|
397
400
|
components.append(i)
|
398
401
|
else:
|
@@ -49,7 +49,7 @@ def process_messages(worker_type, worker):
|
|
49
49
|
|
50
50
|
|
51
51
|
@click.command(help="Initialize workers")
|
52
|
-
@tracing.
|
52
|
+
@tracing.cli("sidecar")
|
53
53
|
@click.argument("worker-type")
|
54
54
|
def main(worker_type):
|
55
55
|
sidecar_type = SIDECARS.get(worker_type)
|
metaflow/task.py
CHANGED
@@ -493,6 +493,25 @@ class MetaflowTask(object):
|
|
493
493
|
)
|
494
494
|
)
|
495
495
|
|
496
|
+
# Add runtime dag information to the metadata of the task
|
497
|
+
foreach_execution_path = ",".join(
|
498
|
+
[
|
499
|
+
"{}:{}".format(foreach_frame.step, foreach_frame.index)
|
500
|
+
for foreach_frame in foreach_stack
|
501
|
+
]
|
502
|
+
)
|
503
|
+
if foreach_execution_path:
|
504
|
+
metadata.extend(
|
505
|
+
[
|
506
|
+
MetaDatum(
|
507
|
+
field="foreach-execution-path",
|
508
|
+
value=foreach_execution_path,
|
509
|
+
type="foreach-execution-path",
|
510
|
+
tags=metadata_tags,
|
511
|
+
),
|
512
|
+
]
|
513
|
+
)
|
514
|
+
|
496
515
|
self.metadata.register_metadata(
|
497
516
|
run_id,
|
498
517
|
step_name,
|
@@ -559,6 +578,7 @@ class MetaflowTask(object):
|
|
559
578
|
self.flow._success = False
|
560
579
|
self.flow._task_ok = None
|
561
580
|
self.flow._exception = None
|
581
|
+
|
562
582
|
# Note: All internal flow attributes (ie: non-user artifacts)
|
563
583
|
# should either be set prior to running the user code or listed in
|
564
584
|
# FlowSpec._EPHEMERAL to allow for proper merging/importing of
|
@@ -616,7 +636,6 @@ class MetaflowTask(object):
|
|
616
636
|
"graph_info": self.flow._graph_info,
|
617
637
|
}
|
618
638
|
)
|
619
|
-
|
620
639
|
for deco in decorators:
|
621
640
|
deco.task_pre_step(
|
622
641
|
step_name,
|
@@ -728,7 +747,7 @@ class MetaflowTask(object):
|
|
728
747
|
value=attempt_ok,
|
729
748
|
type="internal_attempt_status",
|
730
749
|
tags=["attempt_id:{0}".format(retry_count)],
|
731
|
-
)
|
750
|
+
),
|
732
751
|
],
|
733
752
|
)
|
734
753
|
|
metaflow/tracing/__init__.py
CHANGED
@@ -20,15 +20,15 @@ def post_fork():
|
|
20
20
|
yield
|
21
21
|
|
22
22
|
|
23
|
-
def
|
24
|
-
def
|
23
|
+
def cli(name: str):
|
24
|
+
def cli_wrap(func):
|
25
25
|
@wraps(func)
|
26
26
|
def wrapper_func(*args, **kwargs):
|
27
27
|
return func(*args, **kwargs)
|
28
28
|
|
29
29
|
return wrapper_func
|
30
30
|
|
31
|
-
return
|
31
|
+
return cli_wrap
|
32
32
|
|
33
33
|
|
34
34
|
def inject_tracing_vars(env_dict: Dict[str, str]) -> Dict[str, str]:
|
@@ -40,7 +40,9 @@ def get_trace_id() -> str:
|
|
40
40
|
|
41
41
|
|
42
42
|
@contextlib.contextmanager
|
43
|
-
def traced(name, attrs=
|
43
|
+
def traced(name, attrs=None):
|
44
|
+
if attrs is None:
|
45
|
+
attrs = {}
|
44
46
|
yield
|
45
47
|
|
46
48
|
|
@@ -54,17 +56,15 @@ def tracing(func):
|
|
54
56
|
|
55
57
|
if not DISABLE_TRACING and (CONSOLE_TRACE_ENABLED or OTEL_ENDPOINT or ZIPKIN_ENDPOINT):
|
56
58
|
try:
|
57
|
-
# Overrides No-Op implementations if a specific provider is configured.
|
58
59
|
from .tracing_modules import (
|
59
60
|
init_tracing,
|
60
61
|
post_fork,
|
61
|
-
|
62
|
+
cli,
|
62
63
|
inject_tracing_vars,
|
63
64
|
get_trace_id,
|
64
65
|
traced,
|
65
66
|
tracing,
|
66
67
|
)
|
67
|
-
|
68
68
|
except ImportError as e:
|
69
69
|
# We keep the errors silent by default so that having tracing environment variables present
|
70
70
|
# does not affect users with no need for tracing.
|
@@ -3,60 +3,53 @@ from metaflow.metaflow_config import (
|
|
3
3
|
OTEL_ENDPOINT,
|
4
4
|
ZIPKIN_ENDPOINT,
|
5
5
|
CONSOLE_TRACE_ENABLED,
|
6
|
+
SERVICE_AUTH_KEY,
|
7
|
+
SERVICE_HEADERS,
|
6
8
|
)
|
7
9
|
|
8
10
|
|
9
11
|
def get_span_exporter():
|
10
|
-
|
11
|
-
|
12
|
+
exporter_map = {
|
13
|
+
OTEL_ENDPOINT: _create_otel_exporter,
|
14
|
+
ZIPKIN_ENDPOINT: _create_zipkin_exporter,
|
15
|
+
CONSOLE_TRACE_ENABLED: _create_console_exporter,
|
16
|
+
}
|
12
17
|
|
13
|
-
|
14
|
-
|
18
|
+
for config, create_exporter in exporter_map.items():
|
19
|
+
if config:
|
20
|
+
return create_exporter()
|
15
21
|
|
16
|
-
|
17
|
-
|
18
|
-
else:
|
19
|
-
print("WARNING: endpoints not set up for Opentelemetry", file=sys.stderr)
|
20
|
-
return
|
22
|
+
print("WARNING: endpoints not set up for OpenTelemetry", file=sys.stderr)
|
23
|
+
return None
|
21
24
|
|
22
25
|
|
23
|
-
def
|
26
|
+
def _create_otel_exporter():
|
24
27
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
25
28
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
29
|
+
if not any([SERVICE_AUTH_KEY, SERVICE_HEADERS]):
|
30
|
+
print("WARNING: no auth settings for OpenTelemetry", file=sys.stderr)
|
31
|
+
return None
|
32
|
+
|
33
|
+
config = {
|
34
|
+
"endpoint": OTEL_ENDPOINT,
|
35
|
+
"timeout": 1,
|
36
|
+
}
|
30
37
|
|
31
38
|
if SERVICE_AUTH_KEY:
|
32
|
-
|
33
|
-
endpoint=OTEL_ENDPOINT,
|
34
|
-
headers={"x-api-key": SERVICE_AUTH_KEY},
|
35
|
-
timeout=1,
|
36
|
-
)
|
39
|
+
config["headers"] = {"x-api-key": SERVICE_AUTH_KEY}
|
37
40
|
elif SERVICE_HEADERS:
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
print("WARNING: no auth settings for Opentelemetry", file=sys.stderr)
|
45
|
-
return
|
46
|
-
return span_exporter
|
47
|
-
|
48
|
-
|
49
|
-
def set_zipkin_exporter():
|
41
|
+
config["headers"] = SERVICE_HEADERS
|
42
|
+
|
43
|
+
return OTLPSpanExporter(**config)
|
44
|
+
|
45
|
+
|
46
|
+
def _create_zipkin_exporter():
|
50
47
|
from opentelemetry.exporter.zipkin.proto.http import ZipkinExporter
|
51
48
|
|
52
|
-
|
53
|
-
endpoint=ZIPKIN_ENDPOINT,
|
54
|
-
)
|
55
|
-
return span_exporter
|
49
|
+
return ZipkinExporter(endpoint=ZIPKIN_ENDPOINT)
|
56
50
|
|
57
51
|
|
58
|
-
def
|
52
|
+
def _create_console_exporter():
|
59
53
|
from opentelemetry.sdk.trace.export import ConsoleSpanExporter
|
60
54
|
|
61
|
-
|
62
|
-
return span_exporter
|
55
|
+
return ConsoleSpanExporter()
|
@@ -13,45 +13,51 @@ from typing import Dict, List, Optional
|
|
13
13
|
from opentelemetry import trace as trace_api, context
|
14
14
|
from .span_exporter import get_span_exporter
|
15
15
|
|
16
|
-
tracer_provider = None
|
16
|
+
tracer_provider: Optional[TracerProvider] = None
|
17
17
|
|
18
18
|
|
19
19
|
def init_tracing():
|
20
20
|
global tracer_provider
|
21
21
|
if tracer_provider is not None:
|
22
|
-
print("Tracing already initialized", file=sys.stderr)
|
23
22
|
return
|
24
23
|
|
25
24
|
from .propagator import EnvPropagator
|
26
25
|
|
27
26
|
set_global_textmap(EnvPropagator(None))
|
28
|
-
span_exporter = get_span_exporter()
|
29
27
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
service_name = "metaflow-awsbatch"
|
34
|
-
else:
|
35
|
-
service_name = "metaflow-local"
|
28
|
+
span_exporter = get_span_exporter()
|
29
|
+
if span_exporter is None:
|
30
|
+
return
|
36
31
|
|
37
32
|
tracer_provider = TracerProvider(
|
38
|
-
resource=Resource.create(
|
33
|
+
resource=Resource.create(
|
34
|
+
{
|
35
|
+
SERVICE_NAME: "metaflow",
|
36
|
+
}
|
37
|
+
)
|
39
38
|
)
|
40
39
|
trace_api.set_tracer_provider(tracer_provider)
|
41
40
|
|
42
41
|
span_processor = BatchSpanProcessor(span_exporter)
|
43
42
|
tracer_provider.add_span_processor(span_processor)
|
44
43
|
|
45
|
-
|
44
|
+
try:
|
45
|
+
from opentelemetry.instrumentation.requests import RequestsInstrumentor
|
46
46
|
|
47
|
-
|
47
|
+
RequestsInstrumentor().instrument(
|
48
|
+
tracer_provider=tracer_provider,
|
49
|
+
)
|
50
|
+
except ImportError:
|
51
|
+
pass
|
48
52
|
|
49
53
|
|
50
54
|
@contextlib.contextmanager
|
51
55
|
def post_fork():
|
52
56
|
global tracer_provider
|
57
|
+
|
53
58
|
tracer_provider = None
|
54
59
|
init_tracing()
|
60
|
+
|
55
61
|
token = context.attach(extract(os.environ))
|
56
62
|
try:
|
57
63
|
tracer = trace_api.get_tracer_provider().get_tracer(__name__)
|
@@ -59,47 +65,27 @@ def post_fork():
|
|
59
65
|
"fork", kind=trace_api.SpanKind.SERVER
|
60
66
|
) as span:
|
61
67
|
span.set_attribute("cmd", " ".join(sys.argv))
|
68
|
+
span.set_attribute("pid", str(os.getpid()))
|
62
69
|
yield
|
63
70
|
finally:
|
64
71
|
context.detach(token)
|
65
72
|
|
66
73
|
|
67
|
-
def
|
68
|
-
|
69
|
-
if i > 0 and tokens[i - 1] == before_token:
|
70
|
-
return tok
|
71
|
-
|
72
|
-
|
73
|
-
def cli_entrypoint(name: str):
|
74
|
-
def cli_entrypoint_wrap(func):
|
74
|
+
def cli(name: str):
|
75
|
+
def cli_wrap(func):
|
75
76
|
@wraps(func)
|
76
77
|
def wrapper_func(*args, **kwargs):
|
77
78
|
global tracer_provider
|
78
|
-
|
79
79
|
init_tracing()
|
80
80
|
|
81
|
-
|
81
|
+
if tracer_provider is None:
|
82
|
+
return func(*args, **kwargs)
|
82
83
|
|
83
84
|
token = context.attach(extract(os.environ))
|
84
85
|
try:
|
85
86
|
tracer = trace_api.get_tracer_provider().get_tracer(__name__)
|
86
|
-
|
87
|
-
card_subcommand = _extract_token_after(sys.argv, "card")
|
88
|
-
|
89
|
-
step_name = _extract_token_after(sys.argv, "step")
|
90
|
-
task_id = _extract_token_after(sys.argv, "--task-id")
|
91
|
-
run_id = _extract_token_after(sys.argv, "--run-id")
|
92
|
-
if step_name and task_id and run_id:
|
93
|
-
better_name = "/".join([run_id, step_name, task_id])
|
94
|
-
elif card_subcommand:
|
95
|
-
better_name = "card/" + card_subcommand
|
96
|
-
elif "run" in sys.argv:
|
97
|
-
better_name = "run"
|
98
|
-
else:
|
99
|
-
better_name = None
|
100
|
-
|
101
87
|
with tracer.start_as_current_span(
|
102
|
-
|
88
|
+
name, kind=trace_api.SpanKind.SERVER
|
103
89
|
) as span:
|
104
90
|
span.set_attribute("cmd", " ".join(sys.argv))
|
105
91
|
span.set_attribute("pid", str(os.getpid()))
|
@@ -113,7 +99,7 @@ def cli_entrypoint(name: str):
|
|
113
99
|
|
114
100
|
return wrapper_func
|
115
101
|
|
116
|
-
return
|
102
|
+
return cli_wrap
|
117
103
|
|
118
104
|
|
119
105
|
def inject_tracing_vars(env_dict: Dict[str, str]) -> Dict[str, str]:
|
@@ -122,23 +108,32 @@ def inject_tracing_vars(env_dict: Dict[str, str]) -> Dict[str, str]:
|
|
122
108
|
|
123
109
|
|
124
110
|
def get_trace_id() -> str:
|
125
|
-
|
111
|
+
try:
|
112
|
+
return format_trace_id(trace_api.get_current_span().get_span_context().trace_id)
|
113
|
+
except Exception:
|
114
|
+
return ""
|
126
115
|
|
127
116
|
|
128
117
|
@contextlib.contextmanager
|
129
|
-
def traced(name, attrs=
|
118
|
+
def traced(name: str, attrs: Optional[Dict] = None):
|
119
|
+
if tracer_provider is None:
|
120
|
+
yield
|
121
|
+
return
|
130
122
|
tracer = trace_api.get_tracer_provider().get_tracer(__name__)
|
131
123
|
with tracer.start_as_current_span(name) as span:
|
132
|
-
|
133
|
-
|
124
|
+
if attrs:
|
125
|
+
for k, v in attrs.items():
|
126
|
+
span.set_attribute(k, v)
|
134
127
|
yield
|
135
128
|
|
136
129
|
|
137
130
|
def tracing(func):
|
138
131
|
@wraps(func)
|
139
132
|
def wrapper_func(*args, **kwargs):
|
140
|
-
|
133
|
+
if tracer_provider is None:
|
134
|
+
return func(*args, **kwargs)
|
141
135
|
|
136
|
+
tracer = trace_api.get_tracer_provider().get_tracer(func.__module__)
|
142
137
|
with tracer.start_as_current_span(func.__name__):
|
143
138
|
return func(*args, **kwargs)
|
144
139
|
|
metaflow/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
metaflow_version = "2.14.
|
1
|
+
metaflow_version = "2.14.2"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: metaflow
|
3
|
-
Version: 2.14.
|
3
|
+
Version: 2.14.2
|
4
4
|
Summary: Metaflow: More Data Science, Less Engineering
|
5
5
|
Author: Metaflow Developers
|
6
6
|
Author-email: help@metaflow.org
|
@@ -26,7 +26,7 @@ License-File: LICENSE
|
|
26
26
|
Requires-Dist: requests
|
27
27
|
Requires-Dist: boto3
|
28
28
|
Provides-Extra: stubs
|
29
|
-
Requires-Dist: metaflow-stubs==2.14.
|
29
|
+
Requires-Dist: metaflow-stubs==2.14.2; extra == "stubs"
|
30
30
|
Dynamic: author
|
31
31
|
Dynamic: author-email
|
32
32
|
Dynamic: classifier
|