ob-metaflow 2.14.0.2rc0__py2.py3-none-any.whl → 2.14.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow might be problematic. Click here for more details.

Files changed (35) hide show
  1. metaflow/cli.py +0 -23
  2. metaflow/cli_components/run_cmds.py +34 -14
  3. metaflow/cli_components/step_cmd.py +2 -0
  4. metaflow/client/core.py +241 -1
  5. metaflow/cmd/main_cli.py +1 -1
  6. metaflow/metadata_provider/heartbeat.py +1 -0
  7. metaflow/metadata_provider/metadata.py +33 -0
  8. metaflow/metaflow_config.py +5 -9
  9. metaflow/mflog/save_logs.py +2 -2
  10. metaflow/plugins/argo/argo_workflows.py +12 -14
  11. metaflow/plugins/argo/argo_workflows_cli.py +2 -2
  12. metaflow/plugins/datatools/s3/s3op.py +4 -4
  13. metaflow/plugins/env_escape/server.py +7 -0
  14. metaflow/plugins/env_escape/stub.py +21 -4
  15. metaflow/plugins/kubernetes/kubernetes.py +0 -4
  16. metaflow/plugins/kubernetes/kubernetes_cli.py +1 -9
  17. metaflow/plugins/kubernetes/kubernetes_decorator.py +0 -5
  18. metaflow/plugins/kubernetes/kubernetes_job.py +3 -5
  19. metaflow/plugins/kubernetes/kubernetes_jobsets.py +0 -8
  20. metaflow/plugins/metadata_providers/local.py +66 -0
  21. metaflow/plugins/metadata_providers/service.py +51 -0
  22. metaflow/plugins/pypi/bootstrap.py +4 -4
  23. metaflow/runner/click_api.py +6 -3
  24. metaflow/sidecar/sidecar_worker.py +1 -1
  25. metaflow/task.py +21 -2
  26. metaflow/tracing/__init__.py +7 -7
  27. metaflow/tracing/span_exporter.py +31 -38
  28. metaflow/tracing/tracing_modules.py +35 -43
  29. metaflow/version.py +1 -1
  30. {ob_metaflow-2.14.0.2rc0.dist-info → ob_metaflow-2.14.2.1.dist-info}/METADATA +2 -2
  31. {ob_metaflow-2.14.0.2rc0.dist-info → ob_metaflow-2.14.2.1.dist-info}/RECORD +35 -35
  32. {ob_metaflow-2.14.0.2rc0.dist-info → ob_metaflow-2.14.2.1.dist-info}/LICENSE +0 -0
  33. {ob_metaflow-2.14.0.2rc0.dist-info → ob_metaflow-2.14.2.1.dist-info}/WHEEL +0 -0
  34. {ob_metaflow-2.14.0.2rc0.dist-info → ob_metaflow-2.14.2.1.dist-info}/entry_points.txt +0 -0
  35. {ob_metaflow-2.14.0.2rc0.dist-info → ob_metaflow-2.14.2.1.dist-info}/top_level.txt +0 -0
@@ -264,6 +264,7 @@ class Server(object):
264
264
  def serve(self, path=None, port=None):
265
265
  # Open up a connection
266
266
  if path is not None:
267
+ # Keep the print line to facilitate debugging
267
268
  # print("SERVER: Starting at %s" % path)
268
269
  sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
269
270
  __try_op__("bind", sock.bind, BIND_RETRY, path)
@@ -503,6 +504,12 @@ class Server(object):
503
504
  class_type = self._known_classes.get(class_name)
504
505
  if class_type is None:
505
506
  raise ValueError("Unknown class %s" % class_name)
507
+ # Check if __init__ is overridden
508
+ override_mapping = self._overrides.get(class_type)
509
+ if override_mapping:
510
+ override_func = override_mapping.get("__init__")
511
+ if override_func:
512
+ return override_func(None, class_type, *args, **kwargs)
506
513
  return class_type(*args, **kwargs)
507
514
 
508
515
  def _handle_subclasscheck(self, target, class_name, otherclass_name, reverse=False):
@@ -276,9 +276,22 @@ class MetaWithConnection(StubMetaClass):
276
276
  if len(args) > 0 and id(args[0]) == id(cls.___class_connection___):
277
277
  return super(MetaWithConnection, cls).__call__(*args, **kwargs)
278
278
  else:
279
- return cls.___class_connection___.stub_request(
280
- None, OP_INIT, cls.___class_remote_class_name___, *args, **kwargs
281
- )
279
+ if hasattr(cls, "__overriden_init__"):
280
+ return cls.__overriden_init__(
281
+ None,
282
+ functools.partial(
283
+ cls.___class_connection___.stub_request,
284
+ None,
285
+ OP_INIT,
286
+ cls.___class_remote_class_name___,
287
+ ),
288
+ *args,
289
+ **kwargs
290
+ )
291
+ else:
292
+ return cls.___class_connection___.stub_request(
293
+ None, OP_INIT, cls.___class_remote_class_name___, *args, **kwargs
294
+ )
282
295
 
283
296
  def __subclasscheck__(cls, subclass):
284
297
  subclass_name = "%s.%s" % (subclass.__module__, subclass.__name__)
@@ -381,7 +394,10 @@ def create_class(
381
394
  name = name[7:]
382
395
  method_type = CLASS_METHOD
383
396
  if name in overriden_methods:
384
- if method_type == NORMAL_METHOD:
397
+ if name == "__init__":
398
+ class_dict["__overriden_init__"] = overriden_methods["__init__"]
399
+
400
+ elif method_type == NORMAL_METHOD:
385
401
  class_dict[name] = (
386
402
  lambda override, orig_method: lambda obj, *args, **kwargs: override(
387
403
  obj, functools.partial(orig_method, obj), *args, **kwargs
@@ -412,6 +428,7 @@ def create_class(
412
428
  class_dict[name] = _make_method(
413
429
  method_type, connection, class_name, name, doc
414
430
  )
431
+
415
432
  # Check for any getattr/setattr overrides
416
433
  special_attributes = set(getattr_overrides.keys())
417
434
  special_attributes.update(set(setattr_overrides.keys()))
@@ -194,7 +194,6 @@ class Kubernetes(object):
194
194
  port=None,
195
195
  num_parallel=None,
196
196
  qos=None,
197
- extended_resources=None,
198
197
  ):
199
198
  name = "js-%s" % str(uuid4())[:6]
200
199
  jobset = (
@@ -228,7 +227,6 @@ class Kubernetes(object):
228
227
  port=port,
229
228
  num_parallel=num_parallel,
230
229
  qos=qos,
231
- extended_resources=extended_resources,
232
230
  )
233
231
  .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
234
232
  .environment_variable("METAFLOW_CODE_URL", code_package_url)
@@ -506,7 +504,6 @@ class Kubernetes(object):
506
504
  name_pattern=None,
507
505
  qos=None,
508
506
  annotations=None,
509
- extended_resources=None,
510
507
  ):
511
508
  if env is None:
512
509
  env = {}
@@ -549,7 +546,6 @@ class Kubernetes(object):
549
546
  shared_memory=shared_memory,
550
547
  port=port,
551
548
  qos=qos,
552
- extended_resources=extended_resources,
553
549
  )
554
550
  .environment_variable("METAFLOW_CODE_SHA", code_package_sha)
555
551
  .environment_variable("METAFLOW_CODE_URL", code_package_url)
@@ -39,7 +39,7 @@ def kubernetes():
39
39
  "command inside a Kubernetes pod with the given options. Typically you do not call "
40
40
  "this command directly; it is used internally by Metaflow."
41
41
  )
42
- @tracing.cli_entrypoint("kubernetes/step")
42
+ @tracing.cli("kubernetes/step")
43
43
  @click.argument("step-name")
44
44
  @click.argument("code-package-sha")
45
45
  @click.argument("code-package-url")
@@ -145,12 +145,6 @@ def kubernetes():
145
145
  type=JSONTypeClass(),
146
146
  multiple=False,
147
147
  )
148
- @click.option(
149
- "--extended-resources",
150
- default=None,
151
- type=JSONTypeClass(),
152
- multiple=False,
153
- )
154
148
  @click.pass_context
155
149
  def step(
156
150
  ctx,
@@ -182,7 +176,6 @@ def step(
182
176
  qos=None,
183
177
  labels=None,
184
178
  annotations=None,
185
- extended_resources=None,
186
179
  **kwargs
187
180
  ):
188
181
  def echo(msg, stream="stderr", job_id=None, **kwargs):
@@ -326,7 +319,6 @@ def step(
326
319
  qos=qos,
327
320
  labels=labels,
328
321
  annotations=annotations,
329
- extended_resources=extended_resources,
330
322
  )
331
323
  except Exception:
332
324
  traceback.print_exc(chain=False)
@@ -123,9 +123,6 @@ class KubernetesDecorator(StepDecorator):
123
123
  Only applicable when @parallel is used.
124
124
  qos: str, default: Burstable
125
125
  Quality of Service class to assign to the pod. Supported values are: Guaranteed, Burstable, BestEffort
126
- extended_resources: Dict[str, str], optional, default None
127
- Extended resources to be requested for the pod.
128
- https://kubernetes.io/docs/tasks/administer-cluster/extended-resource-node/
129
126
  """
130
127
 
131
128
  name = "kubernetes"
@@ -156,7 +153,6 @@ class KubernetesDecorator(StepDecorator):
156
153
  "executable": None,
157
154
  "hostname_resolution_timeout": 10 * 60,
158
155
  "qos": KUBERNETES_QOS,
159
- "extended_resources": {},
160
156
  }
161
157
  package_url = None
162
158
  package_sha = None
@@ -492,7 +488,6 @@ class KubernetesDecorator(StepDecorator):
492
488
  "persistent_volume_claims",
493
489
  "labels",
494
490
  "annotations",
495
- "extended_resources",
496
491
  ]:
497
492
  cli_args.command_options[k] = json.dumps(v)
498
493
  else:
@@ -89,11 +89,10 @@ class KubernetesJob(object):
89
89
 
90
90
  additional_obp_configs = {
91
91
  "OBP_PERIMETER": initial_configs["OBP_PERIMETER"],
92
- "OBP_INTEGRATIONS_URL": initial_configs["OBP_INTEGRATIONS_URL"],
92
+ "OBP_INTEGRATIONS_URL": initial_configs[
93
+ "OBP_INTEGRATIONS_URL"
94
+ ],
93
95
  }
94
- extended_resources = self._kwargs.get("extended_resources", {})
95
- qos_requests = {**qos_requests, **extended_resources}
96
- qos_limits = {**qos_limits, **extended_resources}
97
96
 
98
97
  return client.V1JobSpec(
99
98
  # Retries are handled by Metaflow when it is responsible for
@@ -119,7 +118,6 @@ class KubernetesJob(object):
119
118
  containers=[
120
119
  client.V1Container(
121
120
  command=self._kwargs["command"],
122
- security_context=client.V1SecurityContext(privileged=True),
123
121
  termination_message_policy="FallbackToLogsOnError",
124
122
  ports=(
125
123
  []
@@ -560,11 +560,6 @@ class JobSetSpec(object):
560
560
  self._kwargs["memory"],
561
561
  self._kwargs["disk"],
562
562
  )
563
-
564
- extended_resources = self._kwargs.get("extended_resources", {})
565
- qos_requests = {**qos_requests, **extended_resources}
566
- qos_limits = {**qos_limits, **extended_resources}
567
-
568
563
  return dict(
569
564
  name=self.name,
570
565
  template=client.api_client.ApiClient().sanitize_for_serialization(
@@ -616,9 +611,6 @@ class JobSetSpec(object):
616
611
  )
617
612
  ]
618
613
  ),
619
- security_context=client.V1SecurityContext(
620
- privileged=True
621
- ),
622
614
  env=[
623
615
  client.V1EnvVar(name=k, value=str(v))
624
616
  for k, v in self._kwargs.get(
@@ -2,10 +2,12 @@ import collections
2
2
  import glob
3
3
  import json
4
4
  import os
5
+ import re
5
6
  import random
6
7
  import tempfile
7
8
  import time
8
9
  from collections import namedtuple
10
+ from typing import List
9
11
 
10
12
  from metaflow.exception import MetaflowInternalError, MetaflowTaggingError
11
13
  from metaflow.metadata_provider.metadata import ObjectOrder
@@ -202,6 +204,70 @@ class LocalMetadataProvider(MetadataProvider):
202
204
  "Tagging failed due to too many conflicting updates from other processes"
203
205
  )
204
206
 
207
+ @classmethod
208
+ def filter_tasks_by_metadata(
209
+ cls,
210
+ flow_name: str,
211
+ run_id: str,
212
+ step_name: str,
213
+ field_name: str,
214
+ pattern: str,
215
+ ) -> List[str]:
216
+ """
217
+ Filter tasks by metadata field and pattern, returning task pathspecs that match criteria.
218
+
219
+ Parameters
220
+ ----------
221
+ flow_name : str
222
+ Identifier for the flow
223
+ run_id : str
224
+ Identifier for the run
225
+ step_name : str
226
+ Name of the step to query tasks from
227
+ field_name : str
228
+ Name of metadata field to query
229
+ pattern : str
230
+ Pattern to match in metadata field value
231
+
232
+ Returns
233
+ -------
234
+ List[str]
235
+ List of task pathspecs that match the query criteria
236
+ """
237
+ tasks = cls.get_object("step", "task", {}, None, flow_name, run_id, step_name)
238
+ if not tasks:
239
+ return []
240
+
241
+ regex = re.compile(pattern)
242
+ matching_task_pathspecs = []
243
+
244
+ for task in tasks:
245
+ task_id = task.get("task_id")
246
+ if not task_id:
247
+ continue
248
+
249
+ if pattern == ".*":
250
+ # If the pattern is ".*", we can match all tasks without reading metadata
251
+ matching_task_pathspecs.append(
252
+ f"{flow_name}/{run_id}/{step_name}/{task_id}"
253
+ )
254
+ continue
255
+
256
+ metadata = cls.get_object(
257
+ "task", "metadata", {}, None, flow_name, run_id, step_name, task_id
258
+ )
259
+
260
+ if any(
261
+ meta.get("field_name") == field_name
262
+ and regex.match(meta.get("value", ""))
263
+ for meta in metadata
264
+ ):
265
+ matching_task_pathspecs.append(
266
+ f"{flow_name}/{run_id}/{step_name}/{task_id}"
267
+ )
268
+
269
+ return matching_task_pathspecs
270
+
205
271
  @classmethod
206
272
  def _get_object_internal(
207
273
  cls, obj_type, obj_order, sub_type, sub_order, filters, attempt, *args
@@ -4,6 +4,7 @@ import time
4
4
 
5
5
  import requests
6
6
 
7
+ from typing import List
7
8
  from metaflow.exception import (
8
9
  MetaflowException,
9
10
  MetaflowInternalError,
@@ -13,6 +14,7 @@ from metaflow.metadata_provider import MetadataProvider
13
14
  from metaflow.metadata_provider.heartbeat import HB_URL_KEY
14
15
  from metaflow.metaflow_config import SERVICE_HEADERS, SERVICE_RETRY_COUNT, SERVICE_URL
15
16
  from metaflow.sidecar import Message, MessageTypes, Sidecar
17
+ from urllib.parse import urlencode
16
18
  from metaflow.util import version_parse
17
19
 
18
20
 
@@ -318,6 +320,55 @@ class ServiceMetadataProvider(MetadataProvider):
318
320
  self._register_system_metadata(run_id, step_name, task["task_id"], attempt)
319
321
  return task["task_id"], did_create
320
322
 
323
+ @classmethod
324
+ def filter_tasks_by_metadata(
325
+ cls,
326
+ flow_name: str,
327
+ run_id: str,
328
+ step_name: str,
329
+ field_name: str,
330
+ pattern: str,
331
+ ) -> List[str]:
332
+ """
333
+ Filter tasks by metadata field and pattern, returning task pathspecs that match criteria.
334
+
335
+ Parameters
336
+ ----------
337
+ flow_name : str
338
+ Flow name, that the run belongs to.
339
+ run_id: str
340
+ Run id, together with flow_id, that identifies the specific Run whose tasks to query
341
+ step_name: str
342
+ Step name to query tasks from
343
+ field_name: str
344
+ Metadata field name to query
345
+ pattern: str
346
+ Pattern to match in metadata field value
347
+
348
+ Returns
349
+ -------
350
+ List[str]
351
+ List of task pathspecs that satisfy the query
352
+ """
353
+ query_params = {
354
+ "metadata_field_name": field_name,
355
+ "pattern": pattern,
356
+ "step_name": step_name,
357
+ }
358
+ url = ServiceMetadataProvider._obj_path(flow_name, run_id, step_name)
359
+ url = f"{url}/filtered_tasks?{urlencode(query_params)}"
360
+ try:
361
+ resp = cls._request(None, url, "GET")
362
+ except Exception as e:
363
+ if e.http_code == 404:
364
+ # filter_tasks_by_metadata endpoint does not exist in the version of metadata service
365
+ # deployed currently. Raise a more informative error message.
366
+ raise MetaflowInternalError(
367
+ "The version of metadata service deployed currently does not support filtering tasks by metadata. "
368
+ "Upgrade Metadata service to version 2.15 or greater to use this feature."
369
+ ) from e
370
+ return resp
371
+
321
372
  @staticmethod
322
373
  def _obj_path(
323
374
  flow_name,
@@ -10,7 +10,7 @@ import tarfile
10
10
  import time
11
11
  from urllib.error import URLError
12
12
  from urllib.request import urlopen
13
- from metaflow.metaflow_config import DATASTORE_LOCAL_DIR, CONDA_USE_FAST_INIT
13
+ from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
14
14
  from metaflow.plugins import DATASTORES
15
15
  from metaflow.plugins.pypi.utils import MICROMAMBA_MIRROR_URL, MICROMAMBA_URL
16
16
  from metaflow.util import which
@@ -329,8 +329,6 @@ if __name__ == "__main__":
329
329
 
330
330
  @timer
331
331
  def fast_setup_environment(architecture, storage, env, prefix, pkgs_dir):
332
- install_fast_initializer(architecture)
333
-
334
332
  # Get package urls
335
333
  conda_pkgs = env["conda"]
336
334
  pypi_pkgs = env.get("pypi", [])
@@ -381,7 +379,9 @@ if __name__ == "__main__":
381
379
  with open(os.path.join(manifest_dir, MAGIC_FILE)) as f:
382
380
  env = json.load(f)[id_][architecture]
383
381
 
384
- if CONDA_USE_FAST_INIT:
382
+ if datastore_type == "s3":
383
+ # TODO: Remove this once fast-initializer is ready for all datastores
384
+ install_fast_initializer(architecture)
385
385
  fast_setup_environment(architecture, storage, env, prefix, pkgs_dir)
386
386
  else:
387
387
  setup_environment(
@@ -97,12 +97,13 @@ def _method_sanity_check(
97
97
  check_type(supplied_v, annotations[supplied_k])
98
98
  except TypeCheckError:
99
99
  raise TypeError(
100
- "Invalid type for '%s' (%s), expected: '%s', default is '%s'"
100
+ "Invalid type for '%s' (%s), expected: '%s', default is '%s' but found '%s'"
101
101
  % (
102
102
  supplied_k,
103
103
  type(supplied_k),
104
104
  annotations[supplied_k],
105
105
  defaults[supplied_k],
106
+ str(supplied_v),
106
107
  )
107
108
  )
108
109
 
@@ -218,7 +219,7 @@ def get_inspect_param_obj(p: Union[click.Argument, click.Option], kind: str):
218
219
  default=inspect.Parameter.empty if is_vararg else p.default,
219
220
  annotation=annotation,
220
221
  ),
221
- annotation,
222
+ Optional[TTuple[annotation]] if is_vararg else annotation,
222
223
  )
223
224
 
224
225
 
@@ -392,7 +393,9 @@ class MetaflowAPI(object):
392
393
  options = params.pop("options", {})
393
394
 
394
395
  for _, v in args.items():
395
- if isinstance(v, list):
396
+ if v is None:
397
+ continue
398
+ if isinstance(v, (list, tuple)):
396
399
  for i in v:
397
400
  components.append(i)
398
401
  else:
@@ -49,7 +49,7 @@ def process_messages(worker_type, worker):
49
49
 
50
50
 
51
51
  @click.command(help="Initialize workers")
52
- @tracing.cli_entrypoint("sidecar")
52
+ @tracing.cli("sidecar")
53
53
  @click.argument("worker-type")
54
54
  def main(worker_type):
55
55
  sidecar_type = SIDECARS.get(worker_type)
metaflow/task.py CHANGED
@@ -493,6 +493,25 @@ class MetaflowTask(object):
493
493
  )
494
494
  )
495
495
 
496
+ # Add runtime dag information to the metadata of the task
497
+ foreach_execution_path = ",".join(
498
+ [
499
+ "{}:{}".format(foreach_frame.step, foreach_frame.index)
500
+ for foreach_frame in foreach_stack
501
+ ]
502
+ )
503
+ if foreach_execution_path:
504
+ metadata.extend(
505
+ [
506
+ MetaDatum(
507
+ field="foreach-execution-path",
508
+ value=foreach_execution_path,
509
+ type="foreach-execution-path",
510
+ tags=metadata_tags,
511
+ ),
512
+ ]
513
+ )
514
+
496
515
  self.metadata.register_metadata(
497
516
  run_id,
498
517
  step_name,
@@ -559,6 +578,7 @@ class MetaflowTask(object):
559
578
  self.flow._success = False
560
579
  self.flow._task_ok = None
561
580
  self.flow._exception = None
581
+
562
582
  # Note: All internal flow attributes (ie: non-user artifacts)
563
583
  # should either be set prior to running the user code or listed in
564
584
  # FlowSpec._EPHEMERAL to allow for proper merging/importing of
@@ -616,7 +636,6 @@ class MetaflowTask(object):
616
636
  "graph_info": self.flow._graph_info,
617
637
  }
618
638
  )
619
-
620
639
  for deco in decorators:
621
640
  deco.task_pre_step(
622
641
  step_name,
@@ -728,7 +747,7 @@ class MetaflowTask(object):
728
747
  value=attempt_ok,
729
748
  type="internal_attempt_status",
730
749
  tags=["attempt_id:{0}".format(retry_count)],
731
- )
750
+ ),
732
751
  ],
733
752
  )
734
753
 
@@ -20,15 +20,15 @@ def post_fork():
20
20
  yield
21
21
 
22
22
 
23
- def cli_entrypoint(name: str):
24
- def cli_entrypoint_wrap(func):
23
+ def cli(name: str):
24
+ def cli_wrap(func):
25
25
  @wraps(func)
26
26
  def wrapper_func(*args, **kwargs):
27
27
  return func(*args, **kwargs)
28
28
 
29
29
  return wrapper_func
30
30
 
31
- return cli_entrypoint_wrap
31
+ return cli_wrap
32
32
 
33
33
 
34
34
  def inject_tracing_vars(env_dict: Dict[str, str]) -> Dict[str, str]:
@@ -40,7 +40,9 @@ def get_trace_id() -> str:
40
40
 
41
41
 
42
42
  @contextlib.contextmanager
43
- def traced(name, attrs={}):
43
+ def traced(name, attrs=None):
44
+ if attrs is None:
45
+ attrs = {}
44
46
  yield
45
47
 
46
48
 
@@ -54,17 +56,15 @@ def tracing(func):
54
56
 
55
57
  if not DISABLE_TRACING and (CONSOLE_TRACE_ENABLED or OTEL_ENDPOINT or ZIPKIN_ENDPOINT):
56
58
  try:
57
- # Overrides No-Op implementations if a specific provider is configured.
58
59
  from .tracing_modules import (
59
60
  init_tracing,
60
61
  post_fork,
61
- cli_entrypoint,
62
+ cli,
62
63
  inject_tracing_vars,
63
64
  get_trace_id,
64
65
  traced,
65
66
  tracing,
66
67
  )
67
-
68
68
  except ImportError as e:
69
69
  # We keep the errors silent by default so that having tracing environment variables present
70
70
  # does not affect users with no need for tracing.
@@ -3,6 +3,8 @@ from metaflow.metaflow_config import (
3
3
  OTEL_ENDPOINT,
4
4
  ZIPKIN_ENDPOINT,
5
5
  CONSOLE_TRACE_ENABLED,
6
+ SERVICE_AUTH_KEY,
7
+ SERVICE_HEADERS,
6
8
  )
7
9
 
8
10
  if OTEL_ENDPOINT:
@@ -11,56 +13,47 @@ if OTEL_ENDPOINT:
11
13
 
12
14
 
13
15
  def get_span_exporter():
14
- if OTEL_ENDPOINT:
15
- return set_otel_exporter()
16
+ exporter_map = {
17
+ OTEL_ENDPOINT: _create_otel_exporter,
18
+ ZIPKIN_ENDPOINT: _create_zipkin_exporter,
19
+ CONSOLE_TRACE_ENABLED: _create_console_exporter,
20
+ }
16
21
 
17
- elif ZIPKIN_ENDPOINT:
18
- return set_zipkin_exporter()
22
+ for config, create_exporter in exporter_map.items():
23
+ if config:
24
+ return create_exporter()
19
25
 
20
- elif CONSOLE_TRACE_ENABLED:
21
- return set_console_exporter()
22
- else:
23
- print("WARNING: endpoints not set up for Opentelemetry", file=sys.stderr)
24
- return
26
+ print("WARNING: endpoints not set up for OpenTelemetry", file=sys.stderr)
27
+ return None
25
28
 
26
29
 
27
- def set_otel_exporter():
30
+ def _create_otel_exporter():
28
31
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
29
32
 
30
- from metaflow.metaflow_config import (
31
- SERVICE_AUTH_KEY,
32
- SERVICE_HEADERS,
33
- )
33
+ if not any([SERVICE_AUTH_KEY, SERVICE_HEADERS]):
34
+ print("WARNING: no auth settings for OpenTelemetry", file=sys.stderr)
35
+ return None
36
+
37
+ config = {
38
+ "endpoint": OTEL_ENDPOINT,
39
+ "timeout": 15,
40
+ }
34
41
 
35
42
  if SERVICE_AUTH_KEY:
36
- span_exporter = OTLPSpanExporter(
37
- endpoint=OTEL_ENDPOINT,
38
- headers={"x-api-key": SERVICE_AUTH_KEY},
39
- timeout=15,
40
- )
43
+ config["headers"] = {"x-api-key": SERVICE_AUTH_KEY}
41
44
  elif SERVICE_HEADERS:
42
- span_exporter = OTLPSpanExporter(
43
- endpoint=OTEL_ENDPOINT,
44
- headers=SERVICE_HEADERS,
45
- timeout=15,
46
- )
47
- else:
48
- print("WARNING: no auth settings for Opentelemetry", file=sys.stderr)
49
- return
50
- return span_exporter
51
-
52
-
53
- def set_zipkin_exporter():
45
+ config["headers"] = SERVICE_HEADERS
46
+
47
+ return OTLPSpanExporter(**config)
48
+
49
+
50
+ def _create_zipkin_exporter():
54
51
  from opentelemetry.exporter.zipkin.proto.http import ZipkinExporter
55
52
 
56
- span_exporter = ZipkinExporter(
57
- endpoint=ZIPKIN_ENDPOINT,
58
- )
59
- return span_exporter
53
+ return ZipkinExporter(endpoint=ZIPKIN_ENDPOINT)
60
54
 
61
55
 
62
- def set_console_exporter():
56
+ def _create_console_exporter():
63
57
  from opentelemetry.sdk.trace.export import ConsoleSpanExporter
64
58
 
65
- span_exporter = ConsoleSpanExporter()
66
- return span_exporter
59
+ return ConsoleSpanExporter()