docent-python 0.1.41a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

Files changed (59) hide show
  1. docent/__init__.py +4 -0
  2. docent/_llm_util/__init__.py +0 -0
  3. docent/_llm_util/data_models/__init__.py +0 -0
  4. docent/_llm_util/data_models/exceptions.py +48 -0
  5. docent/_llm_util/data_models/llm_output.py +331 -0
  6. docent/_llm_util/llm_cache.py +193 -0
  7. docent/_llm_util/llm_svc.py +472 -0
  8. docent/_llm_util/model_registry.py +134 -0
  9. docent/_llm_util/providers/__init__.py +0 -0
  10. docent/_llm_util/providers/anthropic.py +537 -0
  11. docent/_llm_util/providers/common.py +41 -0
  12. docent/_llm_util/providers/google.py +530 -0
  13. docent/_llm_util/providers/openai.py +745 -0
  14. docent/_llm_util/providers/openrouter.py +375 -0
  15. docent/_llm_util/providers/preference_types.py +104 -0
  16. docent/_llm_util/providers/provider_registry.py +164 -0
  17. docent/_log_util/__init__.py +3 -0
  18. docent/_log_util/logger.py +141 -0
  19. docent/data_models/__init__.py +14 -0
  20. docent/data_models/_tiktoken_util.py +91 -0
  21. docent/data_models/agent_run.py +473 -0
  22. docent/data_models/chat/__init__.py +37 -0
  23. docent/data_models/chat/content.py +56 -0
  24. docent/data_models/chat/message.py +191 -0
  25. docent/data_models/chat/tool.py +109 -0
  26. docent/data_models/citation.py +187 -0
  27. docent/data_models/formatted_objects.py +84 -0
  28. docent/data_models/judge.py +17 -0
  29. docent/data_models/metadata_util.py +16 -0
  30. docent/data_models/regex.py +56 -0
  31. docent/data_models/transcript.py +305 -0
  32. docent/data_models/util.py +170 -0
  33. docent/judges/__init__.py +23 -0
  34. docent/judges/analysis.py +77 -0
  35. docent/judges/impl.py +587 -0
  36. docent/judges/runner.py +129 -0
  37. docent/judges/stats.py +205 -0
  38. docent/judges/types.py +320 -0
  39. docent/judges/util/forgiving_json.py +108 -0
  40. docent/judges/util/meta_schema.json +86 -0
  41. docent/judges/util/meta_schema.py +29 -0
  42. docent/judges/util/parse_output.py +68 -0
  43. docent/judges/util/voting.py +139 -0
  44. docent/loaders/load_inspect.py +215 -0
  45. docent/py.typed +0 -0
  46. docent/samples/__init__.py +3 -0
  47. docent/samples/load.py +9 -0
  48. docent/samples/log.eval +0 -0
  49. docent/samples/tb_airline.json +1 -0
  50. docent/sdk/__init__.py +0 -0
  51. docent/sdk/agent_run_writer.py +317 -0
  52. docent/sdk/client.py +1186 -0
  53. docent/sdk/llm_context.py +432 -0
  54. docent/trace.py +2741 -0
  55. docent/trace_temp.py +1086 -0
  56. docent_python-0.1.41a0.dist-info/METADATA +33 -0
  57. docent_python-0.1.41a0.dist-info/RECORD +59 -0
  58. docent_python-0.1.41a0.dist-info/WHEEL +4 -0
  59. docent_python-0.1.41a0.dist-info/licenses/LICENSE.md +13 -0
docent/trace.py ADDED
@@ -0,0 +1,2741 @@
1
+ # pyright: reportUnnecessaryIsInstance=false
2
+
3
+ import atexit
4
+ import contextvars
5
+ import itertools
6
+ import json
7
+ import os
8
+ import sys
9
+ import threading
10
+ import time
11
+ import uuid
12
+ from collections import defaultdict
13
+ from contextlib import asynccontextmanager, contextmanager
14
+ from contextvars import ContextVar, Token
15
+ from datetime import datetime, timezone
16
+ from enum import Enum
17
+ from importlib.metadata import Distribution, distributions
18
+ from typing import (
19
+ Any,
20
+ AsyncIterator,
21
+ Callable,
22
+ Dict,
23
+ Iterator,
24
+ List,
25
+ Mapping,
26
+ Optional,
27
+ Sequence,
28
+ Set,
29
+ Union,
30
+ cast,
31
+ )
32
+
33
+ import requests
34
+ from opentelemetry import trace
35
+ from opentelemetry.context import Context
36
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
37
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
38
+ from opentelemetry.instrumentation.threading import ThreadingInstrumentor
39
+ from opentelemetry.sdk.resources import Resource
40
+ from opentelemetry.sdk.trace import ReadableSpan, SpanLimits, SpanProcessor, TracerProvider
41
+ from opentelemetry.sdk.trace.export import (
42
+ BatchSpanProcessor,
43
+ ConsoleSpanExporter,
44
+ SimpleSpanProcessor,
45
+ )
46
+ from opentelemetry.trace import Span
47
+ from requests import Response
48
+
49
+ from docent._log_util import get_logger
50
+
51
+ logger = get_logger(__name__)
52
+
53
+ # Default configuration
54
+ DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
55
+ DEFAULT_COLLECTION_NAME = "default-collection-name"
56
+ ERROR_DETAIL_MAX_CHARS = 500
57
+
58
+ # Sentinel values for when tracing is disabled
59
+ DISABLED_AGENT_RUN_ID = "disabled"
60
+ DISABLED_TRANSCRIPT_ID = "disabled"
61
+ DISABLED_TRANSCRIPT_GROUP_ID = "disabled"
62
+
63
+
64
+ def _get_disabled_agent_run_id(agent_run_id: Optional[str]) -> str:
65
+ """Return sentinel value for agent run ID when tracing is disabled."""
66
+ if agent_run_id is None:
67
+ return DISABLED_AGENT_RUN_ID
68
+ return agent_run_id
69
+
70
+
71
+ def _get_disabled_transcript_id(transcript_id: Optional[str]) -> str:
72
+ """Return sentinel value for transcript ID when tracing is disabled."""
73
+ if transcript_id is None:
74
+ return DISABLED_TRANSCRIPT_ID
75
+ return transcript_id
76
+
77
+
78
+ def _get_disabled_transcript_group_id(transcript_group_id: Optional[str]) -> str:
79
+ """Return sentinel value for transcript group ID when tracing is disabled."""
80
+ if transcript_group_id is None:
81
+ return DISABLED_TRANSCRIPT_GROUP_ID
82
+ return transcript_group_id
83
+
84
+
85
+ class DocentTelemetryRequestError(RuntimeError):
86
+ """Raised when the Docent telemetry backend rejects a client request."""
87
+
88
+
89
+ class Instruments(Enum):
90
+ """Enumeration of available instrument types."""
91
+
92
+ OPENAI = "openai"
93
+ ANTHROPIC = "anthropic"
94
+ BEDROCK = "bedrock"
95
+ LANGCHAIN = "langchain"
96
+ GOOGLE_GENERATIVEAI = "google_generativeai"
97
+
98
+
99
+ class DocentTracer:
100
+ """
101
+ Manages Docent tracing setup and provides tracing utilities.
102
+ """
103
+
104
+ def __init__(
105
+ self,
106
+ collection_name: str = DEFAULT_COLLECTION_NAME,
107
+ collection_id: Optional[str] = None,
108
+ agent_run_id: Optional[str] = None,
109
+ endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
110
+ headers: Optional[Dict[str, str]] = None,
111
+ api_key: Optional[str] = None,
112
+ enable_console_export: bool = False,
113
+ enable_otlp_export: bool = True,
114
+ disable_batch: bool = False,
115
+ instruments: Optional[Set[Instruments]] = None,
116
+ block_instruments: Optional[Set[Instruments]] = None,
117
+ ):
118
+ self._initialized: bool = False
119
+ # Check if tracing is disabled via environment variable
120
+ if _global_tracing_disabled:
121
+ self._disabled = True
122
+ logger.info("Docent tracing disabled.")
123
+ return
124
+
125
+ if not isinstance(collection_name, str) or not collection_name:
126
+ logger.error(
127
+ "collection_name must be provided as a non-empty string (got %r); defaulting to %s.",
128
+ collection_name,
129
+ DEFAULT_COLLECTION_NAME,
130
+ )
131
+ self.collection_name = DEFAULT_COLLECTION_NAME
132
+ else:
133
+ self.collection_name = collection_name
134
+
135
+ if collection_id is not None:
136
+ if isinstance(collection_id, str) and collection_id:
137
+ self.collection_id = collection_id
138
+ else:
139
+ logger.error(
140
+ "collection_id must be provided as a non-empty string (got %r); generating a new ID.",
141
+ collection_id,
142
+ )
143
+ self.collection_id = str(uuid.uuid4())
144
+ else:
145
+ self.collection_id = str(uuid.uuid4())
146
+
147
+ if agent_run_id is not None:
148
+ if isinstance(agent_run_id, str) and agent_run_id:
149
+ self.default_agent_run_id = agent_run_id
150
+ else:
151
+ logger.error(
152
+ "default agent_run_id must be a non-empty string (got %r); generating a new ID.",
153
+ agent_run_id,
154
+ )
155
+ self.default_agent_run_id = str(uuid.uuid4())
156
+ else:
157
+ self.default_agent_run_id = str(uuid.uuid4())
158
+ self.endpoints: List[str] = self._prepare_endpoints(endpoint)
159
+
160
+ # Build headers with authentication if provided
161
+ if headers is None:
162
+ self.headers: Dict[str, str] = {}
163
+ elif not isinstance(headers, dict):
164
+ logger.error(
165
+ "HTTP headers for Docent tracing must be provided as a dict (got %r).",
166
+ headers,
167
+ )
168
+ self.headers = {}
169
+ else:
170
+ sanitized_headers: Dict[str, str] = {}
171
+ for header_key, header_value in headers.items():
172
+ if not isinstance(header_key, str):
173
+ logger.error(
174
+ "HTTP header keys must be strings; skipping key %r of type %s.",
175
+ header_key,
176
+ type(header_key).__name__,
177
+ )
178
+ continue
179
+ if not isinstance(header_value, str):
180
+ logger.error(
181
+ "HTTP header values must be strings; skipping '%s' value of type %s.",
182
+ header_key,
183
+ type(header_value).__name__,
184
+ )
185
+ continue
186
+ sanitized_headers[header_key] = header_value
187
+ self.headers = sanitized_headers
188
+
189
+ # Handle API key authentication (takes precedence over custom headers)
190
+ if api_key is not None:
191
+ if isinstance(api_key, str) and api_key:
192
+ self.headers["Authorization"] = f"Bearer {api_key}"
193
+ else:
194
+ logger.error(
195
+ "api_key must be a non-empty string (got %r); ignoring value.", api_key
196
+ )
197
+
198
+ if self.headers.get("Authorization"):
199
+ logger.info(f"Using API key authentication for {self.collection_name}")
200
+ else:
201
+ logger.info(f"No authentication configured for {self.collection_name}")
202
+
203
+ self.enable_console_export = enable_console_export
204
+ self.enable_otlp_export = enable_otlp_export
205
+ self.disable_batch = disable_batch
206
+ self.disabled_instruments: Set[Instruments] = {Instruments.LANGCHAIN}
207
+ self.instruments = instruments or (set(Instruments) - self.disabled_instruments)
208
+ self.block_instruments = block_instruments or set()
209
+
210
+ # Use separate tracer provider to avoid interfering with existing OTEL setup
211
+ self._tracer_provider: Optional[TracerProvider] = None
212
+ self._root_context: Optional[Context] = Context()
213
+ self._tracer: Optional[trace.Tracer] = None
214
+ self._cleanup_registered: bool = False
215
+ self._disabled: bool = False
216
+ self._spans_processors: List[Union[BatchSpanProcessor, SimpleSpanProcessor]] = []
217
+
218
+ # Base HTTP endpoint for direct API calls (scores, metadata, trace-done)
219
+ if len(self.endpoints) > 0:
220
+ self._api_endpoint_base: Optional[str] = self.endpoints[0]
221
+
222
+ # Context variables for agent_run_id and transcript_id
223
+ self._collection_id_var: ContextVar[str] = contextvars.ContextVar("docent_collection_id")
224
+ self._agent_run_id_var: ContextVar[str] = contextvars.ContextVar("docent_agent_run_id")
225
+ self._transcript_id_var: ContextVar[str] = contextvars.ContextVar("docent_transcript_id")
226
+ self._transcript_group_id_var: ContextVar[str] = contextvars.ContextVar(
227
+ "docent_transcript_group_id"
228
+ )
229
+ self._attributes_var: ContextVar[dict[str, Any]] = contextvars.ContextVar(
230
+ "docent_attributes"
231
+ )
232
+ # Store atomic span order counters per transcript_id to persist across context switches
233
+ self._transcript_counters: defaultdict[str, itertools.count[int]] = defaultdict(
234
+ lambda: itertools.count(0)
235
+ )
236
+ self._transcript_counter_lock = threading.Lock()
237
+ self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
238
+ self._transcript_group_state_lock = threading.Lock()
239
+ self._flush_lock = threading.Lock()
240
+ self._pending_agent_run_metadata_events: defaultdict[str, List[Dict[str, Any]]] = (
241
+ defaultdict(list)
242
+ )
243
+ self._pending_transcript_metadata_events: defaultdict[str, List[Dict[str, Any]]] = (
244
+ defaultdict(list)
245
+ )
246
+ # Transcript-group events are keyed by agent_run_id so they flush even if no span carries the group attribute.
247
+ self._pending_transcript_group_metadata_events: defaultdict[str, List[Dict[str, Any]]] = (
248
+ defaultdict(list)
249
+ )
250
+ self._pending_metadata_lock = threading.Lock()
251
+
252
+ def _prepare_endpoints(self, endpoint: Union[str, Sequence[str]]) -> List[str]:
253
+ """
254
+ Normalize endpoint input with simple type checks; fall back to DEFAULT_ENDPOINT as needed.
255
+ """
256
+ endpoints: List[str] = []
257
+
258
+ if isinstance(endpoint, str):
259
+ candidate = endpoint.strip()
260
+ if not candidate:
261
+ logger.error(
262
+ "Docent telemetry endpoint cannot be empty; defaulting to %s.", DEFAULT_ENDPOINT
263
+ )
264
+ else:
265
+ endpoints.append(candidate)
266
+ elif isinstance(endpoint, (list, tuple)):
267
+ for index, value in enumerate(endpoint):
268
+ if not isinstance(value, str):
269
+ logger.error(
270
+ "Endpoint entries must be strings; entry at index %s is %s (%r). Skipping it.",
271
+ index,
272
+ type(value).__name__,
273
+ value,
274
+ )
275
+ continue
276
+ candidate = value.strip()
277
+ if not candidate:
278
+ logger.error(
279
+ "Endpoint entries cannot be empty strings (index %s). Skipping it.",
280
+ index,
281
+ )
282
+ continue
283
+ endpoints.append(candidate)
284
+ else:
285
+ logger.error(
286
+ "Endpoint must be a string or list/tuple of strings (got %r). Defaulting to %s.",
287
+ endpoint,
288
+ DEFAULT_ENDPOINT,
289
+ )
290
+
291
+ if not endpoints:
292
+ endpoints = [DEFAULT_ENDPOINT]
293
+
294
+ return endpoints
295
+
296
+ def get_current_agent_run_id(self) -> Optional[str]:
297
+ """
298
+ Get the current agent run ID from context.
299
+
300
+ Retrieves the agent run ID that was set in the current execution context.
301
+ If no agent run context is active, returns the default agent run ID.
302
+
303
+ Returns:
304
+ The current agent run ID if available, or the default agent run ID
305
+ if no context is active.
306
+ """
307
+ try:
308
+ return self._agent_run_id_var.get()
309
+ except LookupError:
310
+ return self.default_agent_run_id
311
+
312
+ def _register_cleanup(self):
313
+ """Register cleanup handlers."""
314
+ if self._cleanup_registered:
315
+ return
316
+
317
+ # Register atexit handler
318
+ atexit.register(self.cleanup)
319
+
320
+ self._cleanup_registered = True
321
+
322
+ def _next_span_order(self, transcript_id: str) -> int:
323
+ """
324
+ Get the next span order for a given transcript_id.
325
+ Thread-safe and guaranteed to be unique and monotonic.
326
+ """
327
+ with self._transcript_counter_lock:
328
+ return next(self._transcript_counters[transcript_id])
329
+
330
+ def _get_current_span(self) -> Optional[Span]:
331
+ """Return the active span, ignoring non-recording placeholders."""
332
+ try:
333
+ span = trace.get_current_span()
334
+ except Exception:
335
+ return None
336
+
337
+ try:
338
+ span_context = span.get_span_context()
339
+ except AttributeError:
340
+ return None
341
+
342
+ if span_context is None or not span_context.is_valid:
343
+ return None
344
+ return span
345
+
346
+ def _create_metadata_event(
347
+ self,
348
+ *,
349
+ name: str,
350
+ metadata: Optional[Dict[str, Any]],
351
+ attributes: Dict[str, Any],
352
+ timestamp_ns: Optional[int] = None,
353
+ ) -> Dict[str, Any]:
354
+ return {
355
+ "name": name,
356
+ "metadata": metadata or {},
357
+ "attributes": attributes,
358
+ "timestamp_ns": timestamp_ns or time.time_ns(),
359
+ }
360
+
361
+ def _add_metadata_event_to_span(self, span: Span, event: Dict[str, Any]) -> None:
362
+ if not hasattr(span, "add_event"):
363
+ return
364
+
365
+ event_attributes: Dict[str, Any] = dict(event.get("attributes", {}))
366
+ metadata_payload = cast(Optional[Dict[str, Any]], event.get("metadata"))
367
+ if metadata_payload is not None:
368
+ try:
369
+ event_attributes["metadata_json"] = json.dumps(metadata_payload)
370
+ except (TypeError, ValueError) as exc:
371
+ logger.warning("Failed to serialize metadata payload for span event: %s", exc)
372
+
373
+ timestamp_ns = event.get("timestamp_ns")
374
+ span.add_event(
375
+ event.get("name", "metadata"), attributes=event_attributes, timestamp=timestamp_ns
376
+ )
377
+
378
+ def _pop_pending_events(
379
+ self, store: defaultdict[str, List[Dict[str, Any]]], key: Optional[str]
380
+ ) -> List[Dict[str, Any]]:
381
+ if key is None:
382
+ return []
383
+ with self._pending_metadata_lock:
384
+ if key not in store:
385
+ return []
386
+ events = list(store[key])
387
+ del store[key]
388
+ return events
389
+
390
+ def _emit_pending_metadata_events(
391
+ self,
392
+ span: Span,
393
+ *,
394
+ agent_run_id: Optional[str],
395
+ transcript_id: Optional[str],
396
+ transcript_group_id: Optional[str],
397
+ ) -> None:
398
+ for event in self._pop_pending_events(
399
+ self._pending_agent_run_metadata_events, agent_run_id
400
+ ):
401
+ self._add_metadata_event_to_span(span, event)
402
+ for event in self._pop_pending_events(
403
+ self._pending_transcript_metadata_events, transcript_id
404
+ ):
405
+ self._add_metadata_event_to_span(span, event)
406
+ for event in self._pop_pending_events(
407
+ self._pending_transcript_group_metadata_events, agent_run_id
408
+ ):
409
+ self._add_metadata_event_to_span(span, event)
410
+
411
+ def _queue_metadata_event(
412
+ self,
413
+ store: defaultdict[str, List[Dict[str, Any]]],
414
+ key: Optional[str],
415
+ event: Dict[str, Any],
416
+ ) -> None:
417
+ if not key:
418
+ logger.warning("Metadata event discarded because no identifier was provided: %s", event)
419
+ return
420
+ with self._pending_metadata_lock:
421
+ store[key].append(event)
422
+
423
+ def _emit_or_queue_metadata_event(
424
+ self,
425
+ *,
426
+ store: defaultdict[str, List[Dict[str, Any]]],
427
+ key: Optional[str],
428
+ event: Dict[str, Any],
429
+ ) -> None:
430
+ span = self._get_current_span()
431
+ if span is not None:
432
+ try:
433
+ self._add_metadata_event_to_span(span, event)
434
+ return
435
+ except Exception as exc:
436
+ logger.warning("Failed to attach metadata event to active span: %s", exc)
437
+ self._queue_metadata_event(store, key, event)
438
+
439
+ def _get_optional_context_value(self, var: ContextVar[str]) -> Optional[str]:
440
+ """Fetch a context var without creating a default when unset."""
441
+ try:
442
+ return var.get()
443
+ except LookupError:
444
+ return None
445
+
446
+ def _has_pending_metadata(
447
+ self,
448
+ *,
449
+ agent_run_id: Optional[str],
450
+ transcript_id: Optional[str],
451
+ transcript_group_id: Optional[str],
452
+ ) -> bool:
453
+ with self._pending_metadata_lock:
454
+ if agent_run_id and self._pending_agent_run_metadata_events.get(agent_run_id):
455
+ return True
456
+ if transcript_id and self._pending_transcript_metadata_events.get(transcript_id):
457
+ return True
458
+ if agent_run_id and self._pending_transcript_group_metadata_events.get(agent_run_id):
459
+ return True
460
+ return False
461
+
462
+ def _flush_pending_metadata_events(
463
+ self,
464
+ *,
465
+ agent_run_id: Optional[str],
466
+ transcript_id: Optional[str],
467
+ transcript_group_id: Optional[str],
468
+ ) -> None:
469
+ """
470
+ Attach any queued metadata events to a synthetic span so data is not dropped when no further spans start.
471
+ """
472
+ if self.is_disabled() or self._tracer is None:
473
+ return
474
+
475
+ if not self._has_pending_metadata(
476
+ agent_run_id=agent_run_id,
477
+ transcript_id=transcript_id,
478
+ transcript_group_id=transcript_group_id,
479
+ ):
480
+ return
481
+
482
+ span = self._tracer.start_span("docent.metadata.flush", context=self._root_context)
483
+ try:
484
+ span.set_attribute("collection_id", self.collection_id)
485
+ if agent_run_id:
486
+ span.set_attribute("agent_run_id", agent_run_id)
487
+ if transcript_id:
488
+ span.set_attribute("transcript_id", transcript_id)
489
+ if transcript_group_id:
490
+ span.set_attribute("transcript_group_id", transcript_group_id)
491
+
492
+ self._emit_pending_metadata_events(
493
+ span,
494
+ agent_run_id=agent_run_id,
495
+ transcript_id=transcript_id,
496
+ transcript_group_id=transcript_group_id,
497
+ )
498
+ finally:
499
+ span.end()
500
+
501
+ def _init_spans_exporter(self, endpoint: str) -> Optional[Union[HTTPExporter, GRPCExporter]]:
502
+ """Initialize the appropriate span exporter based on endpoint."""
503
+ if not self.enable_otlp_export:
504
+ return None
505
+
506
+ try:
507
+ if "http" in endpoint.lower() or "https" in endpoint.lower():
508
+ http_exporter: HTTPExporter = HTTPExporter(
509
+ endpoint=f"{endpoint}/v1/traces", headers=self.headers, timeout=30
510
+ )
511
+ logger.debug(f"Initialized HTTP exporter for endpoint: {endpoint}/v1/traces")
512
+ return http_exporter
513
+ else:
514
+ grpc_exporter: GRPCExporter = GRPCExporter(
515
+ endpoint=endpoint, headers=self.headers, timeout=30
516
+ )
517
+ logger.debug(f"Initialized gRPC exporter for endpoint: {endpoint}")
518
+ return grpc_exporter
519
+ except Exception as e:
520
+ logger.error(f"Failed to initialize span exporter for {endpoint}: {e}")
521
+ return None
522
+
523
+ def _init_spans_exporters(self) -> List[Union[HTTPExporter, GRPCExporter]]:
524
+ """Initialize span exporters for all endpoints."""
525
+ exporters: List[Union[HTTPExporter, GRPCExporter]] = []
526
+
527
+ for endpoint in self.endpoints:
528
+ exporter = self._init_spans_exporter(endpoint)
529
+ if exporter:
530
+ exporters.append(exporter)
531
+ logger.info(f"Initialized exporter for endpoint: {endpoint}")
532
+ else:
533
+ logger.critical(f"Failed to initialize exporter for endpoint: {endpoint}")
534
+
535
+ return exporters
536
+
537
+ def _create_span_processor(
538
+ self, exporter: Union[HTTPExporter, GRPCExporter, ConsoleSpanExporter]
539
+ ) -> Union[SimpleSpanProcessor, BatchSpanProcessor]:
540
+ """Create appropriate span processor based on configuration."""
541
+ if self.disable_batch or _is_notebook():
542
+ simple_processor: SimpleSpanProcessor = SimpleSpanProcessor(exporter)
543
+ logger.debug("Created SimpleSpanProcessor for immediate export")
544
+ return simple_processor
545
+ else:
546
+ batch_processor: BatchSpanProcessor = BatchSpanProcessor(exporter)
547
+ logger.debug("Created BatchSpanProcessor for batched export")
548
+ return batch_processor
549
+
550
+ def initialize(self):
551
+ """Initialize Docent tracing setup."""
552
+ if self._initialized:
553
+ return
554
+
555
+ # If tracing is disabled, mark as initialized but don't set up anything
556
+ if self.is_disabled():
557
+ self._initialized = True
558
+ return
559
+
560
+ try:
561
+
562
+ # Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
563
+ default_attribute_limit = 1024 * 16
564
+ env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
565
+ env_limit = int(env_value) if env_value.isdigit() else 0
566
+ attribute_limit = max(env_limit, default_attribute_limit)
567
+
568
+ span_limits = SpanLimits(
569
+ max_attributes=attribute_limit,
570
+ )
571
+
572
+ # Create our own isolated tracer provider
573
+ self._tracer_provider = TracerProvider(
574
+ resource=Resource.create({"service.name": self.collection_name}),
575
+ span_limits=span_limits,
576
+ )
577
+
578
+ class ContextSpanProcessor(SpanProcessor):
579
+ def __init__(self, manager: "DocentTracer"):
580
+ self.manager: "DocentTracer" = manager
581
+
582
+ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
583
+ # Add collection_id, agent_run_id, transcript_id, transcript_group_id, and any other current attributes
584
+ span.set_attribute("collection_id", self.manager.collection_id)
585
+
586
+ # Set agent_run_id from context
587
+ try:
588
+ agent_run_id: str = self.manager._agent_run_id_var.get()
589
+ if agent_run_id:
590
+ span.set_attribute("agent_run_id", agent_run_id)
591
+ else:
592
+ span.set_attribute("agent_run_id_default", True)
593
+ span.set_attribute("agent_run_id", self.manager.default_agent_run_id)
594
+ except LookupError:
595
+ span.set_attribute("agent_run_id_default", True)
596
+ span.set_attribute("agent_run_id", self.manager.default_agent_run_id)
597
+
598
+ # Set transcript_group_id from context
599
+ try:
600
+ transcript_group_id: str = self.manager._transcript_group_id_var.get()
601
+ if transcript_group_id:
602
+ span.set_attribute("transcript_group_id", transcript_group_id)
603
+ except LookupError:
604
+ pass
605
+
606
+ # Set transcript_id from context
607
+ try:
608
+ transcript_id: str = self.manager._transcript_id_var.get()
609
+ if transcript_id:
610
+ span.set_attribute("transcript_id", transcript_id)
611
+ # Add atomic span order number
612
+ span_order: int = self.manager._next_span_order(transcript_id)
613
+ span.set_attribute("span_order", span_order)
614
+ except LookupError:
615
+ # transcript_id not available, skip it
616
+ pass
617
+
618
+ # Set custom attributes from context
619
+ try:
620
+ attributes: dict[str, Any] = self.manager._attributes_var.get()
621
+ for key, value in attributes.items():
622
+ span.set_attribute(key, value)
623
+ except LookupError:
624
+ # attributes not available, skip them
625
+ pass
626
+
627
+ # Debug logging for span creation
628
+ span_name = getattr(span, "name", "unknown")
629
+ span_attrs = getattr(span, "attributes", {})
630
+ logger.debug(
631
+ f"Created span: name='{span_name}', collection_id={self.manager.collection_id}, agent_run_id={span_attrs.get('agent_run_id')}, transcript_id={span_attrs.get('transcript_id')}"
632
+ )
633
+
634
+ self.manager._emit_pending_metadata_events(
635
+ span,
636
+ agent_run_id=span_attrs.get("agent_run_id"),
637
+ transcript_id=span_attrs.get("transcript_id"),
638
+ transcript_group_id=span_attrs.get("transcript_group_id"),
639
+ )
640
+
641
+ def on_end(self, span: ReadableSpan) -> None:
642
+ pass
643
+
644
+ def shutdown(self) -> None:
645
+ pass
646
+
647
+ def force_flush(self, timeout_millis: Optional[float] = None) -> bool:
648
+ return True
649
+
650
+ # Configure span exporters for our isolated provider
651
+ if self.enable_otlp_export:
652
+ otlp_exporters: List[Union[HTTPExporter, GRPCExporter]] = (
653
+ self._init_spans_exporters()
654
+ )
655
+
656
+ if otlp_exporters:
657
+ # Create a processor for each exporter
658
+ for exporter in otlp_exporters:
659
+ otlp_processor: Union[SimpleSpanProcessor, BatchSpanProcessor] = (
660
+ self._create_span_processor(exporter)
661
+ )
662
+ self._tracer_provider.add_span_processor(otlp_processor)
663
+ self._spans_processors.append(otlp_processor)
664
+
665
+ logger.info(
666
+ f"Added {len(otlp_exporters)} OTLP exporters for {len(self.endpoints)} endpoints"
667
+ )
668
+
669
+ if self.enable_console_export:
670
+ console_exporter: ConsoleSpanExporter = ConsoleSpanExporter()
671
+ console_processor: Union[SimpleSpanProcessor, BatchSpanProcessor] = (
672
+ self._create_span_processor(console_exporter)
673
+ )
674
+ self._tracer_provider.add_span_processor(console_processor)
675
+ self._spans_processors.append(console_processor)
676
+
677
+ # Add our custom context span processor
678
+ context_processor = ContextSpanProcessor(self)
679
+ self._tracer_provider.add_span_processor(context_processor)
680
+
681
+ # Get tracer from our isolated provider (don't set global provider)
682
+ self._tracer = self._tracer_provider.get_tracer(__name__)
683
+
684
+ # Instrument threading for better context propagation
685
+ try:
686
+ ThreadingInstrumentor().instrument()
687
+ except Exception as e:
688
+ logger.warning(f"Failed to instrument threading: {e}")
689
+
690
+ enabled_instruments = self.instruments - self.block_instruments
691
+
692
+ # Instrument OpenAI with our isolated tracer provider
693
+ if Instruments.OPENAI in enabled_instruments:
694
+ try:
695
+ if is_package_installed("openai"):
696
+ from opentelemetry.instrumentation.openai import OpenAIInstrumentor
697
+
698
+ OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
699
+ logger.info("Instrumented OpenAI")
700
+ except Exception as e:
701
+ logger.warning(f"Failed to instrument OpenAI: {e}")
702
+
703
+ # Instrument Anthropic with our isolated tracer provider
704
+ if Instruments.ANTHROPIC in enabled_instruments:
705
+ try:
706
+ if is_package_installed("anthropic"):
707
+ from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
708
+
709
+ AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
710
+ logger.info("Instrumented Anthropic")
711
+ except Exception as e:
712
+ logger.warning(f"Failed to instrument Anthropic: {e}")
713
+
714
+ # Instrument Bedrock with our isolated tracer provider
715
+ if Instruments.BEDROCK in enabled_instruments:
716
+ try:
717
+ if is_package_installed("boto3"):
718
+ from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
719
+
720
+ BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
721
+ logger.info("Instrumented Bedrock")
722
+ except Exception as e:
723
+ logger.warning(f"Failed to instrument Bedrock: {e}")
724
+
725
+ # Instrument LangChain with our isolated tracer provider
726
+ if Instruments.LANGCHAIN in enabled_instruments:
727
+ try:
728
+ if is_package_installed("langchain") or is_package_installed("langgraph"):
729
+ from opentelemetry.instrumentation.langchain import LangchainInstrumentor
730
+
731
+ LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
732
+ logger.info("Instrumented LangChain")
733
+ except Exception as e:
734
+ logger.warning(f"Failed to instrument LangChain: {e}")
735
+
736
+ # Instrument Google Generative AI with our isolated tracer provider
737
+ if Instruments.GOOGLE_GENERATIVEAI in enabled_instruments:
738
+ try:
739
+ if is_package_installed("google-generativeai") or is_package_installed(
740
+ "google-genai"
741
+ ):
742
+ from opentelemetry.instrumentation.google_generativeai import (
743
+ GoogleGenerativeAiInstrumentor,
744
+ )
745
+
746
+ GoogleGenerativeAiInstrumentor().instrument(
747
+ tracer_provider=self._tracer_provider
748
+ )
749
+ logger.info("Instrumented Google Generative AI")
750
+ except Exception as e:
751
+ logger.warning(f"Failed to instrument Google Generative AI: {e}")
752
+
753
+ # Register cleanup handlers
754
+ self._register_cleanup()
755
+
756
+ self._initialized = True
757
+ logger.info(f"Docent tracing initialized for {self.collection_name}")
758
+
759
+ except Exception as e:
760
+ logger.error(f"Failed to initialize Docent tracing: {e}")
761
+ self._disabled = True
762
+ raise
763
+
764
+ def cleanup(self):
765
+ """
766
+ Clean up Docent tracing resources.
767
+
768
+ Flushes all pending spans to exporters and shuts down the tracer provider.
769
+ This method is automatically called during application shutdown via atexit
770
+ handlers, but can also be called manually for explicit cleanup.
771
+
772
+ The cleanup process:
773
+ 1. Flushes all span processors to ensure data is exported
774
+ 2. Shuts down the tracer provider and releases resources
775
+ """
776
+ if self.is_disabled():
777
+ return
778
+
779
+ try:
780
+ self.flush()
781
+
782
+ if self._tracer_provider:
783
+ self._tracer_provider.shutdown()
784
+ self._tracer_provider = None
785
+ except Exception as e:
786
+ logger.error(f"Error during cleanup: {e}")
787
+
788
+ def close(self):
789
+ """Explicitly close the Docent tracing manager."""
790
+ if self.is_disabled():
791
+ return
792
+
793
+ try:
794
+ self.cleanup()
795
+ if self._cleanup_registered:
796
+ atexit.unregister(self.cleanup)
797
+ self._cleanup_registered = False
798
+ except Exception as e:
799
+ logger.error(f"Error during close: {e}")
800
+
801
+ def flush(self) -> None:
802
+ """Force flush all spans to exporters."""
803
+ if self.is_disabled():
804
+ return
805
+
806
+ try:
807
+ logger.debug(f"Flushing {len(self._spans_processors)} span processors")
808
+ for i, processor in enumerate(self._spans_processors):
809
+ if hasattr(processor, "force_flush"):
810
+ logger.debug(f"Flushing span processor {i}")
811
+ processor.force_flush(timeout_millis=50)
812
+ logger.debug("Span flush completed")
813
+ except Exception as e:
814
+ logger.error(f"Error during flush: {e}")
815
+
816
+ def is_disabled(self) -> bool:
817
+ """Check if tracing is disabled."""
818
+ return _global_tracing_disabled or self._disabled
819
+
820
+ def set_disabled(self, disabled: bool) -> None:
821
+ """Enable or disable tracing."""
822
+ self._disabled = disabled
823
+ if disabled and self._initialized:
824
+ self.cleanup()
825
+
826
+ def is_initialized(self) -> bool:
827
+ """Verify if the manager is properly initialized."""
828
+ return self._initialized
829
+
830
+ @contextmanager
831
+ def agent_run_context(
832
+ self,
833
+ agent_run_id: Optional[str] = None,
834
+ transcript_id: Optional[str] = None,
835
+ metadata: Optional[Dict[str, Any]] = None,
836
+ **attributes: Any,
837
+ ) -> Iterator[tuple[str, str]]:
838
+ """
839
+ Context manager for setting up an agent run context.
840
+
841
+ Args:
842
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
843
+ transcript_id: Optional transcript ID (auto-generated if not provided)
844
+ metadata: Optional nested dictionary of metadata to send to backend
845
+ **attributes: Additional attributes to add to the context
846
+
847
+ Yields:
848
+ Tuple of (agent_run_id, transcript_id)
849
+ """
850
+ if self.is_disabled():
851
+ agent_run_id = _get_disabled_agent_run_id(agent_run_id)
852
+ transcript_id = _get_disabled_transcript_id(transcript_id)
853
+ yield agent_run_id, transcript_id
854
+ return
855
+
856
+ if not self._initialized:
857
+ self.initialize()
858
+
859
+ if agent_run_id is not None and (not isinstance(agent_run_id, str) or not agent_run_id):
860
+ logger.error("Invalid agent_run_id for agent_run_context; generating a new ID.")
861
+ agent_run_id = str(uuid.uuid4())
862
+ elif agent_run_id is None:
863
+ agent_run_id = str(uuid.uuid4())
864
+
865
+ if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
866
+ logger.error(
867
+ "Invalid transcript_id for agent_run_context; generating a new transcript ID."
868
+ )
869
+ transcript_id = str(uuid.uuid4())
870
+ elif transcript_id is None:
871
+ transcript_id = str(uuid.uuid4())
872
+
873
+ # Set context variables for this execution context
874
+ agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
875
+ transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
876
+ attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
877
+
878
+ try:
879
+ # Send metadata directly to backend if provided
880
+ if metadata:
881
+ try:
882
+ self.send_agent_run_metadata(agent_run_id, metadata)
883
+ except Exception as e:
884
+ logger.error(f"Failed sending agent run metadata: {e}")
885
+
886
+ yield agent_run_id, transcript_id
887
+ finally:
888
+ transcript_group_id = self._get_optional_context_value(self._transcript_group_id_var)
889
+ self._flush_pending_metadata_events(
890
+ agent_run_id=agent_run_id,
891
+ transcript_id=transcript_id,
892
+ transcript_group_id=transcript_group_id,
893
+ )
894
+ self._agent_run_id_var.reset(agent_run_id_token)
895
+ self._transcript_id_var.reset(transcript_id_token)
896
+ self._attributes_var.reset(attributes_token)
897
+
898
+ @asynccontextmanager
899
+ async def async_agent_run_context(
900
+ self,
901
+ agent_run_id: Optional[str] = None,
902
+ transcript_id: Optional[str] = None,
903
+ metadata: Optional[Dict[str, Any]] = None,
904
+ **attributes: Any,
905
+ ) -> AsyncIterator[tuple[str, str]]:
906
+ """
907
+ Async context manager for setting up an agent run context.
908
+ Modifies the OpenTelemetry context so all spans inherit agent_run_id and transcript_id.
909
+
910
+ Args:
911
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
912
+ transcript_id: Optional transcript ID (auto-generated if not provided)
913
+ metadata: Optional nested dictionary of metadata to send to backend
914
+ **attributes: Additional attributes to add to the context
915
+
916
+ Yields:
917
+ Tuple of (agent_run_id, transcript_id)
918
+ """
919
+ if self.is_disabled():
920
+ agent_run_id = _get_disabled_agent_run_id(agent_run_id)
921
+ transcript_id = _get_disabled_transcript_id(transcript_id)
922
+ yield agent_run_id, transcript_id
923
+ return
924
+
925
+ if not self._initialized:
926
+ self.initialize()
927
+
928
+ if agent_run_id is not None and (not isinstance(agent_run_id, str) or not agent_run_id):
929
+ logger.error("Invalid agent_run_id for async_agent_run_context; generating a new ID.")
930
+ agent_run_id = str(uuid.uuid4())
931
+ elif agent_run_id is None:
932
+ agent_run_id = str(uuid.uuid4())
933
+
934
+ if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
935
+ logger.error(
936
+ "Invalid transcript_id for async_agent_run_context; generating a new transcript ID."
937
+ )
938
+ transcript_id = str(uuid.uuid4())
939
+ elif transcript_id is None:
940
+ transcript_id = str(uuid.uuid4())
941
+
942
+ # Set context variables for this execution context
943
+ agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
944
+ transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
945
+ attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
946
+
947
+ try:
948
+ # Send metadata directly to backend if provided
949
+ if metadata:
950
+ try:
951
+ self.send_agent_run_metadata(agent_run_id, metadata)
952
+ except Exception as e:
953
+ logger.warning(f"Failed sending agent run metadata: {e}")
954
+
955
+ yield agent_run_id, transcript_id
956
+ finally:
957
+ transcript_group_id = self._get_optional_context_value(self._transcript_group_id_var)
958
+ self._flush_pending_metadata_events(
959
+ agent_run_id=agent_run_id,
960
+ transcript_id=transcript_id,
961
+ transcript_group_id=transcript_group_id,
962
+ )
963
+ self._agent_run_id_var.reset(agent_run_id_token)
964
+ self._transcript_id_var.reset(transcript_id_token)
965
+ self._attributes_var.reset(attributes_token)
966
+
967
+ def _api_headers(self) -> Dict[str, str]:
968
+ """
969
+ Get the API headers for HTTP requests.
970
+
971
+ Returns:
972
+ Headers including content type and any custom entries configured on the tracer
973
+ """
974
+ # Copy configured headers so we don't mutate the original dict
975
+ headers = dict(self.headers)
976
+ # Ensure JSON payloads always advertise the correct content type
977
+ headers.setdefault("Content-Type", "application/json")
978
+ return headers
979
+
980
+ def _ensure_json_serializable_metadata(
981
+ self, metadata: Dict[str, Any], context: str
982
+ ) -> Optional[Dict[str, Any]]:
983
+ """
984
+ Validate that metadata can be serialized to JSON before sending it to the backend.
985
+ Returns a sanitized shallow copy so subsequent code never mutates the caller's object.
986
+ Any validation failure is logged and results in None so callers can skip sending metadata.
987
+ """
988
+ if not isinstance(metadata, dict):
989
+ logger.error(
990
+ "%s metadata must be provided as a dict (got %s: %r). Skipping metadata payload.",
991
+ context,
992
+ type(metadata).__name__,
993
+ metadata,
994
+ )
995
+ return None
996
+
997
+ metadata_copy: Dict[str, Any] = {}
998
+ for key, value in metadata.items():
999
+ if not isinstance(key, str):
1000
+ logger.error(
1001
+ "%s metadata keys must be strings; skipping key %r (type %s).",
1002
+ context,
1003
+ key,
1004
+ type(key).__name__,
1005
+ )
1006
+ continue
1007
+ metadata_copy[key] = value
1008
+
1009
+ try:
1010
+ json.dumps(metadata_copy)
1011
+ except (TypeError, ValueError) as exc:
1012
+ logger.error(
1013
+ "%s metadata must be JSON serializable (%s). Skipping metadata payload: %r",
1014
+ context,
1015
+ exc,
1016
+ metadata,
1017
+ )
1018
+ return None
1019
+ offending_path = self._find_null_character_path(metadata_copy)
1020
+ if offending_path is not None:
1021
+ logger.error(
1022
+ "%s metadata cannot contain null characters (found at %s). "
1023
+ "Skipping metadata payload.",
1024
+ context,
1025
+ offending_path,
1026
+ )
1027
+ return None
1028
+ return metadata_copy
1029
+
1030
+ def _post_json(self, path: str, data: Dict[str, Any]) -> None:
1031
+ self._post_json_sync(path, data)
1032
+
1033
+ def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
1034
+ if not self._api_endpoint_base:
1035
+ message = "API endpoint base is not configured"
1036
+ logger.error(message)
1037
+ raise RuntimeError(message)
1038
+ url = f"{self._api_endpoint_base}{path}"
1039
+ try:
1040
+ resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
1041
+ resp.raise_for_status()
1042
+ except requests.exceptions.RequestException as exc:
1043
+ message = self._format_request_exception(url, exc)
1044
+ raise DocentTelemetryRequestError(message) from exc
1045
+
1046
+ def _format_request_exception(self, url: str, exc: requests.exceptions.RequestException) -> str:
1047
+ response: Optional[Response] = getattr(exc, "response", None)
1048
+ message_parts: List[str] = [f"Failed POST {url}"]
1049
+ suggestion: Optional[str]
1050
+
1051
+ if response is not None:
1052
+ status_phrase = f"HTTP {response.status_code}"
1053
+ if response.reason:
1054
+ status_phrase = f"{status_phrase} {response.reason}"
1055
+ message_parts.append(f"({status_phrase})")
1056
+
1057
+ detail = self._extract_response_detail(response)
1058
+ if detail:
1059
+ message_parts.append(f"- Backend detail: {detail}")
1060
+
1061
+ request_id = response.headers.get("x-request-id")
1062
+ if request_id:
1063
+ message_parts.append(f"(request-id: {request_id})")
1064
+
1065
+ suggestion = self._suggest_fix_for_status(response.status_code)
1066
+ else:
1067
+ message_parts.append(f"- {exc}")
1068
+ suggestion = self._suggest_fix_for_status(None)
1069
+
1070
+ if suggestion:
1071
+ message_parts.append(suggestion)
1072
+
1073
+ return " ".join(part for part in message_parts if part)
1074
+
1075
+ def _extract_response_detail(self, response: Response) -> Optional[str]:
1076
+ try:
1077
+ body = response.json()
1078
+ except ValueError:
1079
+ text = response.text.strip()
1080
+ if not text:
1081
+ return None
1082
+ normalized = " ".join(text.split())
1083
+ return self._truncate_error_message(normalized)
1084
+
1085
+ if isinstance(body, dict):
1086
+ typed_body = cast(Dict[str, Any], body)
1087
+ structured_message = self._structured_detail_message(typed_body)
1088
+ if structured_message:
1089
+ return self._truncate_error_message(structured_message)
1090
+ return self._truncate_error_message(self._normalize_error_value(typed_body))
1091
+
1092
+ return self._truncate_error_message(self._normalize_error_value(body))
1093
+
1094
+ def _structured_detail_message(self, data: Dict[str, Any]) -> Optional[str]:
1095
+ for key in ("detail", "message", "error"):
1096
+ if key in data:
1097
+ structured_value = self._structured_detail_value(data[key])
1098
+ if structured_value:
1099
+ return structured_value
1100
+ return self._structured_detail_value(data)
1101
+
1102
+ def _structured_detail_value(self, value: Any) -> Optional[str]:
1103
+ if isinstance(value, Mapping):
1104
+ mapping_value = cast(Mapping[str, Any], value)
1105
+ message = mapping_value.get("message")
1106
+ hint = mapping_value.get("hint")
1107
+ error_code = mapping_value.get("error_code")
1108
+ request_id = mapping_value.get("request_id")
1109
+ fallback_detail = mapping_value.get("detail")
1110
+
1111
+ parts: List[str] = []
1112
+ if isinstance(message, str) and message.strip():
1113
+ parts.append(message.strip())
1114
+ elif isinstance(fallback_detail, str) and fallback_detail.strip():
1115
+ parts.append(fallback_detail.strip())
1116
+
1117
+ if isinstance(hint, str) and hint.strip():
1118
+ parts.append(f"(hint: {hint.strip()})")
1119
+ if isinstance(error_code, str) and error_code.strip():
1120
+ parts.append(f"[code: {error_code.strip()}]")
1121
+ if isinstance(request_id, str) and request_id.strip():
1122
+ parts.append(f"(request-id: {request_id.strip()})")
1123
+
1124
+ return " ".join(parts) if parts else None
1125
+
1126
+ if isinstance(value, str) and value.strip():
1127
+ return value.strip()
1128
+
1129
+ return None
1130
+
1131
+ def _normalize_error_value(self, value: Any) -> str:
1132
+ if isinstance(value, str):
1133
+ return " ".join(value.split())
1134
+
1135
+ try:
1136
+ serialized = json.dumps(value)
1137
+ except (TypeError, ValueError):
1138
+ serialized = str(value)
1139
+
1140
+ return " ".join(serialized.split())
1141
+
1142
+ def _truncate_error_message(self, message: str) -> str:
1143
+ message = message.strip()
1144
+ if len(message) <= ERROR_DETAIL_MAX_CHARS:
1145
+ return message
1146
+ return f"{message[:ERROR_DETAIL_MAX_CHARS]}..."
1147
+
1148
+ def _suggest_fix_for_status(self, status_code: Optional[int]) -> Optional[str]:
1149
+ if status_code in (401, 403):
1150
+ return (
1151
+ "Verify that the Authorization header or DOCENT_API_KEY grants write access to the "
1152
+ "target collection."
1153
+ )
1154
+ if status_code == 404:
1155
+ return (
1156
+ "Ensure the tracing endpoint passed to initialize_tracing matches the Docent server's "
1157
+ "/rest/telemetry route."
1158
+ )
1159
+ if status_code in (400, 422):
1160
+ return (
1161
+ "Confirm the payload includes collection_id, agent_run_id, metadata, and timestamp in "
1162
+ "the expected format."
1163
+ )
1164
+ if status_code and status_code >= 500:
1165
+ return "Inspect the Docent backend logs for the referenced request."
1166
+ if status_code is None:
1167
+ return "Confirm the Docent telemetry endpoint is reachable from this process."
1168
+ return None
1169
+
1170
+ def _find_null_character_path(self, value: Any, path: str = "") -> Optional[str]:
1171
+ """Backend rejects NUL bytes, so detect them before we send metadata to the backend."""
1172
+ if isinstance(value, str):
1173
+ if "\x00" in value or "\\u0000" in value or "\\x00" in value:
1174
+ return path or "<root>"
1175
+ return None
1176
+
1177
+ if isinstance(value, dict):
1178
+ typed_dict: Mapping[str, Any] = cast(Mapping[str, Any], value)
1179
+ for key, item in typed_dict.items():
1180
+ key_str = str(key)
1181
+ next_path = f"{path}.{key_str}" if path else key_str
1182
+ result = self._find_null_character_path(item, next_path)
1183
+ if result:
1184
+ return result
1185
+ return None
1186
+
1187
+ if isinstance(value, (list, tuple)):
1188
+ typed_sequence: Sequence[Any] = cast(Sequence[Any], value)
1189
+ for index, item in enumerate(typed_sequence):
1190
+ next_path = f"{path}[{index}]" if path else f"[{index}]"
1191
+ result = self._find_null_character_path(item, next_path)
1192
+ if result:
1193
+ return result
1194
+ return None
1195
+
1196
+ return None
1197
+
1198
+ def send_agent_run_score(
1199
+ self,
1200
+ agent_run_id: str,
1201
+ name: str,
1202
+ score: float,
1203
+ attributes: Optional[Dict[str, Any]] = None,
1204
+ ) -> None:
1205
+ """
1206
+ Send a score to the backend for a specific agent run.
1207
+
1208
+ Args:
1209
+ agent_run_id: The agent run ID
1210
+ name: Name of the score metric
1211
+ score: Numeric score value
1212
+ attributes: Optional additional attributes
1213
+ """
1214
+ if self.is_disabled():
1215
+ return
1216
+
1217
+ collection_id = self.collection_id
1218
+ if not isinstance(agent_run_id, str) or not agent_run_id:
1219
+ logger.error("Cannot send agent run score without a valid agent_run_id.")
1220
+ return
1221
+
1222
+ if not isinstance(name, str) or not name:
1223
+ logger.error("Cannot send agent run score without a valid score name.")
1224
+ return
1225
+
1226
+ payload: Dict[str, Any] = {
1227
+ "collection_id": collection_id,
1228
+ "agent_run_id": agent_run_id,
1229
+ "score_name": name,
1230
+ "score_value": score,
1231
+ "timestamp": datetime.now(timezone.utc).isoformat(),
1232
+ }
1233
+ if attributes is not None:
1234
+ if not isinstance(attributes, dict):
1235
+ logger.error(
1236
+ "Score attributes must be provided as a dict (got %s: %r). Skipping attributes.",
1237
+ type(attributes).__name__,
1238
+ attributes,
1239
+ )
1240
+ else:
1241
+ sanitized_attributes: Dict[str, Any] = {}
1242
+ for attr_key, attr_value in attributes.items():
1243
+ if not isinstance(attr_key, str):
1244
+ logger.error(
1245
+ "Score attribute keys must be strings; skipping key %r of type %s.",
1246
+ attr_key,
1247
+ type(attr_key).__name__,
1248
+ )
1249
+ continue
1250
+ sanitized_attributes[attr_key] = attr_value
1251
+ payload.update(sanitized_attributes)
1252
+ self._post_json("/v1/scores", payload)
1253
+
1254
+ def send_agent_run_metadata(self, agent_run_id: str, metadata: Dict[str, Any]) -> None:
1255
+ if self.is_disabled():
1256
+ return
1257
+
1258
+ if not isinstance(agent_run_id, str) or not agent_run_id:
1259
+ logger.error("Cannot send agent run metadata without a valid agent_run_id.")
1260
+ return
1261
+
1262
+ metadata_payload = self._ensure_json_serializable_metadata(metadata, "Agent run")
1263
+ if metadata_payload is None:
1264
+ logger.error(
1265
+ "Skipping agent run metadata send for %s due to invalid metadata payload.",
1266
+ agent_run_id,
1267
+ )
1268
+ return
1269
+
1270
+ event = self._create_metadata_event(
1271
+ name="agent_run_metadata",
1272
+ metadata=metadata_payload,
1273
+ attributes={
1274
+ "collection_id": self.collection_id,
1275
+ "agent_run_id": agent_run_id,
1276
+ },
1277
+ )
1278
+ self._emit_or_queue_metadata_event(
1279
+ store=self._pending_agent_run_metadata_events,
1280
+ key=agent_run_id,
1281
+ event=event,
1282
+ )
1283
+
1284
+ def send_transcript_metadata(
1285
+ self,
1286
+ transcript_id: str,
1287
+ name: Optional[str] = None,
1288
+ description: Optional[str] = None,
1289
+ transcript_group_id: Optional[str] = None,
1290
+ metadata: Optional[Dict[str, Any]] = None,
1291
+ ) -> None:
1292
+ """
1293
+ Send transcript data to the backend.
1294
+
1295
+ Args:
1296
+ transcript_id: The transcript ID
1297
+ name: Optional transcript name
1298
+ description: Optional transcript description
1299
+ transcript_group_id: Optional transcript group ID
1300
+ metadata: Optional metadata to send
1301
+ """
1302
+ if self.is_disabled():
1303
+ return
1304
+
1305
+ if not isinstance(transcript_id, str) or not transcript_id:
1306
+ logger.error("Cannot send transcript metadata without a valid transcript_id.")
1307
+ return
1308
+
1309
+ attributes: Dict[str, Any] = {
1310
+ "collection_id": self.collection_id,
1311
+ "transcript_id": transcript_id,
1312
+ "agent_run_id": self.get_current_agent_run_id(),
1313
+ }
1314
+
1315
+ if name is not None:
1316
+ if isinstance(name, str):
1317
+ attributes["name"] = name
1318
+ else:
1319
+ logger.error("Transcript name must be a string; ignoring value %r.", name)
1320
+ if description is not None:
1321
+ if isinstance(description, str):
1322
+ attributes["description"] = description
1323
+ else:
1324
+ logger.error(
1325
+ "Transcript description must be a string; ignoring value %r.", description
1326
+ )
1327
+ if transcript_group_id is not None:
1328
+ if isinstance(transcript_group_id, str) and transcript_group_id:
1329
+ attributes["transcript_group_id"] = transcript_group_id
1330
+ else:
1331
+ logger.error(
1332
+ "transcript_group_id must be a non-empty string; ignoring value %r.",
1333
+ transcript_group_id,
1334
+ )
1335
+
1336
+ metadata_payload: Optional[Dict[str, Any]] = None
1337
+ if metadata is not None:
1338
+ metadata_payload = self._ensure_json_serializable_metadata(metadata, "Transcript")
1339
+ if metadata_payload is None:
1340
+ logger.error(
1341
+ "Transcript %s metadata payload invalid; sending transcript data without metadata.",
1342
+ transcript_id,
1343
+ )
1344
+
1345
+ event = self._create_metadata_event(
1346
+ name="transcript_metadata",
1347
+ metadata=metadata_payload or {},
1348
+ attributes=attributes,
1349
+ )
1350
+ self._emit_or_queue_metadata_event(
1351
+ store=self._pending_transcript_metadata_events,
1352
+ key=transcript_id,
1353
+ event=event,
1354
+ )
1355
+
1356
+ def get_current_transcript_id(self) -> Optional[str]:
1357
+ """
1358
+ Get the current transcript ID from context.
1359
+
1360
+ Returns:
1361
+ The current transcript ID if available, None otherwise
1362
+ """
1363
+ try:
1364
+ return self._transcript_id_var.get()
1365
+ except LookupError:
1366
+ return None
1367
+
1368
+ def get_current_transcript_group_id(self) -> Optional[str]:
1369
+ """
1370
+ Get the current transcript group ID from context.
1371
+
1372
+ Returns:
1373
+ The current transcript group ID if available, None otherwise
1374
+ """
1375
+ try:
1376
+ return self._transcript_group_id_var.get()
1377
+ except LookupError:
1378
+ return None
1379
+
1380
+ @contextmanager
1381
+ def transcript_context(
1382
+ self,
1383
+ name: Optional[str] = None,
1384
+ transcript_id: Optional[str] = None,
1385
+ description: Optional[str] = None,
1386
+ metadata: Optional[Dict[str, Any]] = None,
1387
+ transcript_group_id: Optional[str] = None,
1388
+ ) -> Iterator[str]:
1389
+ """
1390
+ Context manager for setting up a transcript context.
1391
+
1392
+ Args:
1393
+ name: Optional transcript name
1394
+ transcript_id: Optional transcript ID (auto-generated if not provided)
1395
+ description: Optional transcript description
1396
+ metadata: Optional metadata to send to backend
1397
+ transcript_group_id: Optional transcript group ID
1398
+
1399
+ Yields:
1400
+ The transcript ID
1401
+ """
1402
+ if self.is_disabled():
1403
+ transcript_id = _get_disabled_transcript_id(transcript_id)
1404
+ yield transcript_id
1405
+ return
1406
+
1407
+ if not self._initialized:
1408
+ message = "Tracer is not initialized. Call initialize_tracing() before using transcript context."
1409
+ logger.error(message)
1410
+ raise RuntimeError(message)
1411
+
1412
+ if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
1413
+ logger.error(
1414
+ "Invalid transcript_id for transcript_context; generating a new transcript ID."
1415
+ )
1416
+ transcript_id = str(uuid.uuid4())
1417
+ elif transcript_id is None:
1418
+ transcript_id = str(uuid.uuid4())
1419
+
1420
+ # Determine transcript group ID before setting new context
1421
+ if transcript_group_id is None:
1422
+ try:
1423
+ transcript_group_id = self._transcript_group_id_var.get()
1424
+ except LookupError:
1425
+ # No current transcript group context, this transcript has no group
1426
+ transcript_group_id = None
1427
+ else:
1428
+ if isinstance(transcript_group_id, str) and transcript_group_id:
1429
+ pass
1430
+ else:
1431
+ logger.error(
1432
+ "Invalid transcript_group_id for transcript_context; ignoring value %r.",
1433
+ transcript_group_id,
1434
+ )
1435
+ transcript_group_id = None
1436
+
1437
+ # Set context variable for this execution context
1438
+ transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
1439
+
1440
+ try:
1441
+ # Send transcript data and metadata to backend
1442
+ try:
1443
+ self.send_transcript_metadata(
1444
+ transcript_id, name, description, transcript_group_id, metadata
1445
+ )
1446
+ except Exception as e:
1447
+ logger.error(f"Failed sending transcript data: {e}")
1448
+
1449
+ yield transcript_id
1450
+ finally:
1451
+ agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
1452
+ transcript_group_id_for_flush = self._get_optional_context_value(
1453
+ self._transcript_group_id_var
1454
+ )
1455
+ self._flush_pending_metadata_events(
1456
+ agent_run_id=agent_run_id_for_flush,
1457
+ transcript_id=transcript_id,
1458
+ transcript_group_id=transcript_group_id_for_flush,
1459
+ )
1460
+ # Reset context variable to previous state
1461
+ self._transcript_id_var.reset(transcript_id_token)
1462
+
1463
+ @asynccontextmanager
1464
+ async def async_transcript_context(
1465
+ self,
1466
+ name: Optional[str] = None,
1467
+ transcript_id: Optional[str] = None,
1468
+ description: Optional[str] = None,
1469
+ metadata: Optional[Dict[str, Any]] = None,
1470
+ transcript_group_id: Optional[str] = None,
1471
+ ) -> AsyncIterator[str]:
1472
+ """
1473
+ Async context manager for setting up a transcript context.
1474
+
1475
+ Args:
1476
+ name: Optional transcript name
1477
+ transcript_id: Optional transcript ID (auto-generated if not provided)
1478
+ description: Optional transcript description
1479
+ metadata: Optional metadata to send to backend
1480
+ transcript_group_id: Optional transcript group ID
1481
+
1482
+ Yields:
1483
+ The transcript ID
1484
+ """
1485
+ if self.is_disabled():
1486
+ transcript_id = _get_disabled_transcript_id(transcript_id)
1487
+ yield transcript_id
1488
+ return
1489
+
1490
+ if not self._initialized:
1491
+ message = "Tracer is not initialized. Call initialize_tracing() before using transcript context."
1492
+ logger.error(message)
1493
+ raise RuntimeError(message)
1494
+
1495
+ if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
1496
+ logger.error(
1497
+ "Invalid transcript_id for async_transcript_context; generating a new transcript ID."
1498
+ )
1499
+ transcript_id = str(uuid.uuid4())
1500
+ elif transcript_id is None:
1501
+ transcript_id = str(uuid.uuid4())
1502
+
1503
+ # Determine transcript group ID before setting new context
1504
+ if transcript_group_id is None:
1505
+ try:
1506
+ transcript_group_id = self._transcript_group_id_var.get()
1507
+ except LookupError:
1508
+ # No current transcript group context, this transcript has no group
1509
+ transcript_group_id = None
1510
+ else:
1511
+ if isinstance(transcript_group_id, str) and transcript_group_id:
1512
+ pass
1513
+ else:
1514
+ logger.error(
1515
+ "Invalid transcript_group_id for async_transcript_context; ignoring value %r.",
1516
+ transcript_group_id,
1517
+ )
1518
+ transcript_group_id = None
1519
+
1520
+ # Set context variable for this execution context
1521
+ transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
1522
+
1523
+ try:
1524
+ # Send transcript data and metadata to backend
1525
+ try:
1526
+ self.send_transcript_metadata(
1527
+ transcript_id, name, description, transcript_group_id, metadata
1528
+ )
1529
+ except Exception as e:
1530
+ logger.error(f"Failed sending transcript data: {e}")
1531
+
1532
+ yield transcript_id
1533
+ finally:
1534
+ agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
1535
+ transcript_group_id_for_flush = self._get_optional_context_value(
1536
+ self._transcript_group_id_var
1537
+ )
1538
+ self._flush_pending_metadata_events(
1539
+ agent_run_id=agent_run_id_for_flush,
1540
+ transcript_id=transcript_id,
1541
+ transcript_group_id=transcript_group_id_for_flush,
1542
+ )
1543
+ # Reset context variable to previous state
1544
+ self._transcript_id_var.reset(transcript_id_token)
1545
+
1546
+ def send_transcript_group_metadata(
1547
+ self,
1548
+ transcript_group_id: str,
1549
+ name: Optional[str] = None,
1550
+ description: Optional[str] = None,
1551
+ parent_transcript_group_id: Optional[str] = None,
1552
+ metadata: Optional[Dict[str, Any]] = None,
1553
+ ) -> None:
1554
+ """
1555
+ Send transcript group data to the backend.
1556
+
1557
+ Args:
1558
+ transcript_group_id: The transcript group ID
1559
+ name: Optional transcript group name
1560
+ description: Optional transcript group description
1561
+ parent_transcript_group_id: Optional parent transcript group ID
1562
+ metadata: Optional metadata to send
1563
+ """
1564
+ if self.is_disabled():
1565
+ return
1566
+
1567
+ if not isinstance(transcript_group_id, str) or not transcript_group_id:
1568
+ logger.error(
1569
+ "Cannot send transcript group metadata without a valid transcript_group_id."
1570
+ )
1571
+ return
1572
+
1573
+ collection_id = self.collection_id
1574
+
1575
+ # Get agent_run_id from current context
1576
+ agent_run_id = self.get_current_agent_run_id()
1577
+ if not agent_run_id:
1578
+ logger.error(
1579
+ f"Cannot send transcript group metadata for {transcript_group_id} - no agent_run_id in context"
1580
+ )
1581
+ return
1582
+
1583
+ with self._transcript_group_state_lock:
1584
+ state: dict[str, Optional[str]] = self._transcript_group_states.setdefault(
1585
+ transcript_group_id, {}
1586
+ )
1587
+ if name is not None:
1588
+ if isinstance(name, str):
1589
+ final_name = name
1590
+ else:
1591
+ logger.error(
1592
+ "Transcript group name must be a string; ignoring value %r.",
1593
+ name,
1594
+ )
1595
+ final_name = state.get("name")
1596
+ else:
1597
+ final_name = state.get("name")
1598
+
1599
+ if description is not None:
1600
+ if isinstance(description, str):
1601
+ final_description = description
1602
+ else:
1603
+ logger.error(
1604
+ "Transcript group description must be a string; ignoring value %r.",
1605
+ description,
1606
+ )
1607
+ final_description = state.get("description")
1608
+ else:
1609
+ final_description = state.get("description")
1610
+
1611
+ if parent_transcript_group_id is not None:
1612
+ if isinstance(parent_transcript_group_id, str) and parent_transcript_group_id:
1613
+ final_parent_transcript_group_id = parent_transcript_group_id
1614
+ else:
1615
+ logger.error(
1616
+ "parent_transcript_group_id must be a non-empty string; ignoring value %r.",
1617
+ parent_transcript_group_id,
1618
+ )
1619
+ final_parent_transcript_group_id = state.get("parent_transcript_group_id")
1620
+ else:
1621
+ final_parent_transcript_group_id = state.get("parent_transcript_group_id")
1622
+
1623
+ if final_name is not None:
1624
+ state["name"] = final_name
1625
+ if final_description is not None:
1626
+ state["description"] = final_description
1627
+ if final_parent_transcript_group_id is not None:
1628
+ state["parent_transcript_group_id"] = final_parent_transcript_group_id
1629
+
1630
+ attributes: Dict[str, Any] = {
1631
+ "collection_id": collection_id,
1632
+ "transcript_group_id": transcript_group_id,
1633
+ "agent_run_id": agent_run_id,
1634
+ }
1635
+ if final_name is not None:
1636
+ attributes["name"] = final_name
1637
+ if final_description is not None:
1638
+ attributes["description"] = final_description
1639
+ if final_parent_transcript_group_id is not None:
1640
+ attributes["parent_transcript_group_id"] = final_parent_transcript_group_id
1641
+
1642
+ metadata_payload: Optional[Dict[str, Any]] = None
1643
+ if metadata is not None:
1644
+ metadata_payload = self._ensure_json_serializable_metadata(metadata, "Transcript group")
1645
+ if metadata_payload is None:
1646
+ logger.error(
1647
+ "Transcript group %s metadata payload invalid; sending group data without metadata.",
1648
+ transcript_group_id,
1649
+ )
1650
+
1651
+ event = self._create_metadata_event(
1652
+ name="transcript_group_metadata",
1653
+ metadata=metadata_payload or {},
1654
+ attributes=attributes,
1655
+ )
1656
+ self._emit_or_queue_metadata_event(
1657
+ store=self._pending_transcript_group_metadata_events,
1658
+ key=agent_run_id,
1659
+ event=event,
1660
+ )
1661
+
1662
+ @contextmanager
1663
+ def transcript_group_context(
1664
+ self,
1665
+ name: Optional[str] = None,
1666
+ transcript_group_id: Optional[str] = None,
1667
+ description: Optional[str] = None,
1668
+ metadata: Optional[Dict[str, Any]] = None,
1669
+ parent_transcript_group_id: Optional[str] = None,
1670
+ ) -> Iterator[str]:
1671
+ """
1672
+ Context manager for setting up a transcript group context.
1673
+
1674
+ Args:
1675
+ name: Optional transcript group name
1676
+ transcript_group_id: Optional transcript group ID (auto-generated if not provided)
1677
+ description: Optional transcript group description
1678
+ metadata: Optional metadata to send to backend
1679
+ parent_transcript_group_id: Optional parent transcript group ID
1680
+
1681
+ Yields:
1682
+ The transcript group ID
1683
+ """
1684
+ if self.is_disabled():
1685
+ transcript_group_id = _get_disabled_transcript_group_id(transcript_group_id)
1686
+ yield transcript_group_id
1687
+ return
1688
+
1689
+ if not self._initialized:
1690
+ message = "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
1691
+ logger.error(message)
1692
+ raise RuntimeError(message)
1693
+
1694
+ if transcript_group_id is not None and (
1695
+ not isinstance(transcript_group_id, str) or not transcript_group_id
1696
+ ):
1697
+ logger.error(
1698
+ "Invalid transcript_group_id for transcript_group_context; generating a new ID."
1699
+ )
1700
+ transcript_group_id = str(uuid.uuid4())
1701
+ elif transcript_group_id is None:
1702
+ transcript_group_id = str(uuid.uuid4())
1703
+
1704
+ # Determine parent transcript group ID before setting new context
1705
+ if parent_transcript_group_id is None:
1706
+ try:
1707
+ parent_transcript_group_id = self._transcript_group_id_var.get()
1708
+ except LookupError:
1709
+ # No current transcript group context, this becomes a root group
1710
+ parent_transcript_group_id = None
1711
+ else:
1712
+ if isinstance(parent_transcript_group_id, str) and parent_transcript_group_id:
1713
+ pass
1714
+ else:
1715
+ logger.error(
1716
+ "Invalid parent_transcript_group_id for transcript_group_context; ignoring value %r.",
1717
+ parent_transcript_group_id,
1718
+ )
1719
+ parent_transcript_group_id = None
1720
+
1721
+ # Set context variable for this execution context
1722
+ transcript_group_id_token: Token[str] = self._transcript_group_id_var.set(
1723
+ transcript_group_id
1724
+ )
1725
+
1726
+ try:
1727
+ # Send transcript group data and metadata to backend
1728
+ try:
1729
+ self.send_transcript_group_metadata(
1730
+ transcript_group_id, name, description, parent_transcript_group_id, metadata
1731
+ )
1732
+ except Exception as e:
1733
+ logger.error(f"Failed sending transcript group data: {e}")
1734
+
1735
+ yield transcript_group_id
1736
+ finally:
1737
+ agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
1738
+ transcript_id_for_flush = self._get_optional_context_value(self._transcript_id_var)
1739
+ self._flush_pending_metadata_events(
1740
+ agent_run_id=agent_run_id_for_flush,
1741
+ transcript_id=transcript_id_for_flush,
1742
+ transcript_group_id=transcript_group_id,
1743
+ )
1744
+ # Reset context variable to previous state
1745
+ self._transcript_group_id_var.reset(transcript_group_id_token)
1746
+
1747
+ @asynccontextmanager
1748
+ async def async_transcript_group_context(
1749
+ self,
1750
+ name: Optional[str] = None,
1751
+ transcript_group_id: Optional[str] = None,
1752
+ description: Optional[str] = None,
1753
+ metadata: Optional[Dict[str, Any]] = None,
1754
+ parent_transcript_group_id: Optional[str] = None,
1755
+ ) -> AsyncIterator[str]:
1756
+ """
1757
+ Async context manager for setting up a transcript group context.
1758
+
1759
+ Args:
1760
+ name: Optional transcript group name
1761
+ transcript_group_id: Optional transcript group ID (auto-generated if not provided)
1762
+ description: Optional transcript group description
1763
+ metadata: Optional metadata to send to backend
1764
+ parent_transcript_group_id: Optional parent transcript group ID
1765
+
1766
+ Yields:
1767
+ The transcript group ID
1768
+ """
1769
+ if self.is_disabled():
1770
+ transcript_group_id = _get_disabled_transcript_group_id(transcript_group_id)
1771
+ yield transcript_group_id
1772
+ return
1773
+
1774
+ if not self._initialized:
1775
+ message = "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
1776
+ logger.error(message)
1777
+ raise RuntimeError(message)
1778
+
1779
+ if transcript_group_id is not None and (
1780
+ not isinstance(transcript_group_id, str) or not transcript_group_id
1781
+ ):
1782
+ logger.error(
1783
+ "Invalid transcript_group_id for async_transcript_group_context; generating a new ID."
1784
+ )
1785
+ transcript_group_id = str(uuid.uuid4())
1786
+ elif transcript_group_id is None:
1787
+ transcript_group_id = str(uuid.uuid4())
1788
+
1789
+ # Determine parent transcript group ID before setting new context
1790
+ if parent_transcript_group_id is None:
1791
+ try:
1792
+ parent_transcript_group_id = self._transcript_group_id_var.get()
1793
+ except LookupError:
1794
+ # No current transcript group context, this becomes a root group
1795
+ parent_transcript_group_id = None
1796
+ else:
1797
+ if isinstance(parent_transcript_group_id, str) and parent_transcript_group_id:
1798
+ pass
1799
+ else:
1800
+ logger.error(
1801
+ "Invalid parent_transcript_group_id for async_transcript_group_context; ignoring value %r.",
1802
+ parent_transcript_group_id,
1803
+ )
1804
+ parent_transcript_group_id = None
1805
+
1806
+ # Set context variable for this execution context
1807
+ transcript_group_id_token: Token[str] = self._transcript_group_id_var.set(
1808
+ transcript_group_id
1809
+ )
1810
+
1811
+ try:
1812
+ # Send transcript group data and metadata to backend
1813
+ try:
1814
+ self.send_transcript_group_metadata(
1815
+ transcript_group_id, name, description, parent_transcript_group_id, metadata
1816
+ )
1817
+ except Exception as e:
1818
+ logger.error(f"Failed sending transcript group data: {e}")
1819
+
1820
+ yield transcript_group_id
1821
+ finally:
1822
+ agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
1823
+ transcript_id_for_flush = self._get_optional_context_value(self._transcript_id_var)
1824
+ self._flush_pending_metadata_events(
1825
+ agent_run_id=agent_run_id_for_flush,
1826
+ transcript_id=transcript_id_for_flush,
1827
+ transcript_group_id=transcript_group_id,
1828
+ )
1829
+ # Reset context variable to previous state
1830
+ self._transcript_group_id_var.reset(transcript_group_id_token)
1831
+
1832
+ def _send_trace_done(self) -> None:
1833
+ if self.is_disabled():
1834
+ return
1835
+
1836
+ collection_id = self.collection_id
1837
+ payload: Dict[str, Any] = {
1838
+ "collection_id": collection_id,
1839
+ "status": "completed",
1840
+ "timestamp": datetime.now(timezone.utc).isoformat(),
1841
+ }
1842
+ try:
1843
+ self._post_json("/v1/trace-done", payload)
1844
+ except Exception as exc:
1845
+ logger.error(f"Failed to send trace completion signal: {exc}")
1846
+
1847
+
1848
+ _global_tracer: Optional[DocentTracer] = None
1849
+ _global_tracing_disabled: bool = os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
1850
+
1851
+
1852
+ def initialize_tracing(
1853
+ collection_name: str = DEFAULT_COLLECTION_NAME,
1854
+ collection_id: Optional[str] = None,
1855
+ endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
1856
+ headers: Optional[Dict[str, str]] = None,
1857
+ api_key: Optional[str] = None,
1858
+ enable_console_export: bool = False,
1859
+ enable_otlp_export: bool = True,
1860
+ disable_batch: bool = False,
1861
+ instruments: Optional[Set[Instruments]] = None,
1862
+ block_instruments: Optional[Set[Instruments]] = None,
1863
+ ) -> DocentTracer:
1864
+ """
1865
+ Initialize the global Docent tracer.
1866
+
1867
+ This is the primary entry point for setting up Docent tracing.
1868
+ It creates a global singleton instance that can be accessed via get_tracer().
1869
+
1870
+ Args:
1871
+ collection_name: Name of the collection
1872
+ collection_id: Optional collection ID (auto-generated if not provided)
1873
+ endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
1874
+ headers: Optional headers for authentication
1875
+ api_key: Optional API key for bearer token authentication (takes precedence
1876
+ over DOCENT_API_KEY environment variable)
1877
+ enable_console_export: Whether to export spans to console for debugging
1878
+ enable_otlp_export: Whether to export spans to OTLP endpoint
1879
+ disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
1880
+ instruments: Set of instruments to enable (None = all instruments).
1881
+ block_instruments: Set of instruments to explicitly disable.
1882
+
1883
+ Returns:
1884
+ The initialized Docent tracer
1885
+
1886
+ Example:
1887
+ initialize_tracing("my-collection")
1888
+ """
1889
+
1890
+ global _global_tracer
1891
+
1892
+ # Check for API key in environment variable if not provided as parameter
1893
+ if api_key is None:
1894
+ env_api_key: Optional[str] = os.environ.get("DOCENT_API_KEY")
1895
+ api_key = env_api_key
1896
+
1897
+ if _global_tracer is None:
1898
+ _global_tracer = DocentTracer(
1899
+ collection_name=collection_name,
1900
+ collection_id=collection_id,
1901
+ endpoint=endpoint,
1902
+ headers=headers,
1903
+ api_key=api_key,
1904
+ enable_console_export=enable_console_export,
1905
+ enable_otlp_export=enable_otlp_export,
1906
+ disable_batch=disable_batch,
1907
+ instruments=instruments,
1908
+ block_instruments=block_instruments,
1909
+ )
1910
+ _global_tracer.initialize()
1911
+
1912
+ return _global_tracer
1913
+
1914
+
1915
+ def _get_package_name(dist: Distribution) -> str | None:
1916
+ try:
1917
+ return dist.name.lower()
1918
+ except (KeyError, AttributeError):
1919
+ return None
1920
+
1921
+
1922
+ installed_packages = {
1923
+ name for dist in distributions() if (name := _get_package_name(dist)) is not None
1924
+ }
1925
+
1926
+
1927
+ def is_package_installed(package_name: str) -> bool:
1928
+ return package_name.lower() in installed_packages
1929
+
1930
+
1931
+ def get_tracer(
1932
+ caller: str = "get_tracer()", log_error_if_tracer_is_none: bool = True
1933
+ ) -> Optional[DocentTracer]:
1934
+ """
1935
+ Get the global Docent tracer if it has been initialized.
1936
+
1937
+ Args:
1938
+ caller: Human-readable name of the API being invoked. Used for log output.
1939
+ log_error_if_tracer_is_none: Whether to log an error if the tracer is None.
1940
+ NOTE(mengk): when get_tracer is called in is_disabled, I don't want an error logged,
1941
+ since that's what I'm trying to check. In other contexts, it makes sense.
1942
+
1943
+ Returns:
1944
+ The global Docent tracer, or None if tracing has not been initialized.
1945
+ """
1946
+ tracer = _global_tracer
1947
+ if tracer is None:
1948
+ if log_error_if_tracer_is_none:
1949
+ logger.error(
1950
+ f"{caller} requires initialize_tracing() to be called before use. "
1951
+ "You can also disable tracing by calling set_disabled(True) or by setting "
1952
+ "the DOCENT_DISABLE_TRACING environment variable to 'true'."
1953
+ )
1954
+ return None
1955
+
1956
+ if not tracer.is_initialized():
1957
+ logger.error(
1958
+ f"{caller} cannot proceed because initialize_tracing() did not complete successfully. "
1959
+ "You can also disable tracing by calling set_disabled(True) or by setting "
1960
+ "the DOCENT_DISABLE_TRACING environment variable to 'true'."
1961
+ )
1962
+ return None
1963
+
1964
+ return tracer
1965
+
1966
+
1967
+ def close_tracing() -> None:
1968
+ """Close the global Docent tracer."""
1969
+ global _global_tracer
1970
+ if _global_tracer:
1971
+ _global_tracer.close()
1972
+ _global_tracer = None
1973
+
1974
+
1975
+ def flush_tracing() -> None:
1976
+ """Force flush all spans to exporters."""
1977
+ if _global_tracer:
1978
+ logger.debug("Flushing Docent tracer")
1979
+ _global_tracer.flush()
1980
+ else:
1981
+ logger.debug("No global tracer available to flush")
1982
+
1983
+
1984
+ def is_initialized() -> bool:
1985
+ """Verify if the global Docent tracer is properly initialized."""
1986
+ if _global_tracer is None:
1987
+ return False
1988
+ return _global_tracer.is_initialized()
1989
+
1990
+
1991
+ def is_disabled(context_name: str = "Docent tracing") -> bool:
1992
+ """
1993
+ Check if global tracing is disabled for the given context.
1994
+
1995
+ Args:
1996
+ context_name: Human-readable identifier for the caller used in error reporting.
1997
+
1998
+ Returns:
1999
+ True when tracing is disabled globally, when no initialized tracer exists,
2000
+ or when the active tracer reports being disabled.
2001
+ """
2002
+ if _global_tracing_disabled:
2003
+ return True
2004
+ tracer = get_tracer(context_name, log_error_if_tracer_is_none=False)
2005
+ if tracer is None:
2006
+ return True
2007
+ return tracer.is_disabled()
2008
+
2009
+
2010
+ def set_disabled(disabled: bool) -> None:
2011
+ """Enable or disable global tracing."""
2012
+ global _global_tracing_disabled
2013
+ _global_tracing_disabled = disabled
2014
+ if _global_tracer:
2015
+ _global_tracer.set_disabled(disabled)
2016
+
2017
+
2018
+ def agent_run_score(name: str, score: float, attributes: Optional[Dict[str, Any]] = None) -> None:
2019
+ """
2020
+ Send a score to the backend for the current agent run.
2021
+
2022
+ Args:
2023
+ name: Name of the score metric
2024
+ score: Numeric score value
2025
+ attributes: Optional additional attributes for the score event
2026
+ """
2027
+ if is_disabled("agent_run_score()"):
2028
+ return
2029
+
2030
+ tracer = get_tracer("agent_run_score()")
2031
+ if tracer is None:
2032
+ logger.error("Docent tracer unavailable; score will not be sent.")
2033
+ return
2034
+
2035
+ agent_run_id = tracer.get_current_agent_run_id()
2036
+ if not agent_run_id:
2037
+ logger.warning("No active agent run context. Score will not be sent.")
2038
+ return
2039
+
2040
+ try:
2041
+ tracer.send_agent_run_score(agent_run_id, name, score, attributes)
2042
+ except Exception as e:
2043
+ logger.error(f"Failed to send score: {e}")
2044
+
2045
+
2046
+ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
2047
+ """
2048
+ Send metadata directly to the backend for the current agent run.
2049
+
2050
+ Args:
2051
+ metadata: Dictionary of metadata to attach to the current span (can be nested)
2052
+
2053
+ Example:
2054
+ agent_run_metadata({"user": "John", "id": 123, "flagged": True})
2055
+ agent_run_metadata({"user": {"id": "123", "name": "John"}, "config": {"model": "gpt-4"}})
2056
+ """
2057
+ if is_disabled("agent_run_metadata()"):
2058
+ return
2059
+
2060
+ tracer = get_tracer("agent_run_metadata()")
2061
+ if tracer is None:
2062
+ logger.error("Docent tracer unavailable; agent run metadata will not be sent.")
2063
+ return
2064
+
2065
+ agent_run_id = tracer.get_current_agent_run_id()
2066
+ if not agent_run_id:
2067
+ logger.warning("No active agent run context. Metadata will not be sent.")
2068
+ return
2069
+
2070
+ try:
2071
+ tracer.send_agent_run_metadata(agent_run_id, metadata)
2072
+ except Exception as e:
2073
+ logger.error(f"Failed to send agent run metadata: {e}")
2074
+
2075
+
2076
+ def transcript_metadata(
2077
+ metadata: Dict[str, Any],
2078
+ *,
2079
+ name: Optional[str] = None,
2080
+ description: Optional[str] = None,
2081
+ transcript_group_id: Optional[str] = None,
2082
+ ) -> None:
2083
+ """
2084
+ Send transcript metadata directly to the backend for the current transcript.
2085
+
2086
+ Args:
2087
+ metadata: Dictionary of metadata to attach to the current transcript (required)
2088
+ name: Optional transcript name
2089
+ description: Optional transcript description
2090
+ transcript_group_id: Optional transcript group ID to associate with
2091
+
2092
+ Example:
2093
+ transcript_metadata({"user": "John", "model": "gpt-4"})
2094
+ transcript_metadata({"env": "prod"}, name="data_processing")
2095
+ transcript_metadata(
2096
+ {"team": "search"},
2097
+ name="validation",
2098
+ transcript_group_id="group-123",
2099
+ )
2100
+ """
2101
+ if is_disabled("transcript_metadata()"):
2102
+ return
2103
+
2104
+ tracer = get_tracer("transcript_metadata()")
2105
+ if tracer is None:
2106
+ logger.error("Docent tracer unavailable; transcript metadata will not be sent.")
2107
+ return
2108
+
2109
+ transcript_id = tracer.get_current_transcript_id()
2110
+ if not transcript_id:
2111
+ logger.warning("No active transcript context. Metadata will not be sent.")
2112
+ return
2113
+
2114
+ try:
2115
+ tracer.send_transcript_metadata(
2116
+ transcript_id, name, description, transcript_group_id, metadata
2117
+ )
2118
+ except Exception as e:
2119
+ logger.error(f"Failed to send transcript metadata: {e}")
2120
+
2121
+
2122
+ def transcript_group_metadata(
2123
+ metadata: Dict[str, Any],
2124
+ *,
2125
+ name: Optional[str] = None,
2126
+ description: Optional[str] = None,
2127
+ parent_transcript_group_id: Optional[str] = None,
2128
+ ) -> None:
2129
+ """
2130
+ Send transcript group metadata directly to the backend for the current transcript group.
2131
+
2132
+ Args:
2133
+ metadata: Dictionary of metadata to attach to the current transcript group (required)
2134
+ name: Optional transcript group name
2135
+ description: Optional transcript group description
2136
+ parent_transcript_group_id: Optional parent transcript group ID
2137
+
2138
+ Example:
2139
+ transcript_group_metadata({"team": "search", "env": "prod"})
2140
+ transcript_group_metadata({"env": "prod"}, name="pipeline")
2141
+ transcript_group_metadata(
2142
+ {"team": "search"},
2143
+ name="pipeline",
2144
+ parent_transcript_group_id="root-group",
2145
+ )
2146
+ """
2147
+ if is_disabled("transcript_group_metadata()"):
2148
+ return
2149
+
2150
+ tracer = get_tracer("transcript_group_metadata()")
2151
+ if tracer is None:
2152
+ logger.error("Docent tracer unavailable; transcript group metadata will not be sent.")
2153
+ return
2154
+
2155
+ transcript_group_id = tracer.get_current_transcript_group_id()
2156
+ if not transcript_group_id:
2157
+ logger.warning("No active transcript group context. Metadata will not be sent.")
2158
+ return
2159
+
2160
+ try:
2161
+ tracer.send_transcript_group_metadata(
2162
+ transcript_group_id, name, description, parent_transcript_group_id, metadata
2163
+ )
2164
+ except Exception as e:
2165
+ logger.error(f"Failed to send transcript group metadata: {e}")
2166
+
2167
+
2168
+ class AgentRunContext:
2169
+ """Context manager that works in both sync and async contexts."""
2170
+
2171
+ def __init__(
2172
+ self,
2173
+ agent_run_id: Optional[str] = None,
2174
+ transcript_id: Optional[str] = None,
2175
+ metadata: Optional[Dict[str, Any]] = None,
2176
+ **attributes: Any,
2177
+ ):
2178
+ self.agent_run_id = agent_run_id
2179
+ self.transcript_id = transcript_id
2180
+ self.metadata = metadata
2181
+ self.attributes: dict[str, Any] = attributes
2182
+ self._sync_context: Optional[Any] = None
2183
+ self._async_context: Optional[Any] = None
2184
+
2185
+ def __enter__(self) -> tuple[str, str]:
2186
+ """Sync context manager entry."""
2187
+ if is_disabled("agent_run_context"):
2188
+ self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
2189
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2190
+ return self.agent_run_id, self.transcript_id
2191
+
2192
+ tracer = get_tracer("agent_run_context")
2193
+ if tracer is None:
2194
+ logger.error("Cannot enter agent_run_context because tracing is not initialized.")
2195
+ self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
2196
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2197
+ return self.agent_run_id, self.transcript_id
2198
+ self._sync_context = tracer.agent_run_context(
2199
+ self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
2200
+ )
2201
+ return self._sync_context.__enter__()
2202
+
2203
+ def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
2204
+ """Sync context manager exit."""
2205
+ if self._sync_context:
2206
+ self._sync_context.__exit__(exc_type, exc_val, exc_tb)
2207
+
2208
+ async def __aenter__(self) -> tuple[str, str]:
2209
+ """Async context manager entry."""
2210
+ if is_disabled("agent_run_context"):
2211
+ self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
2212
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2213
+ return self.agent_run_id, self.transcript_id
2214
+
2215
+ tracer = get_tracer("agent_run_context")
2216
+ if tracer is None:
2217
+ logger.error("Cannot enter agent_run_context because tracing is not initialized.")
2218
+ self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
2219
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2220
+ return self.agent_run_id, self.transcript_id
2221
+ self._async_context = tracer.async_agent_run_context(
2222
+ self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
2223
+ )
2224
+ return await self._async_context.__aenter__()
2225
+
2226
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
2227
+ """Async context manager exit."""
2228
+ if self._async_context:
2229
+ await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
2230
+
2231
+
2232
+ def agent_run(
2233
+ func: Optional[Callable[..., Any]] = None, *, metadata: Optional[Dict[str, Any]] = None
2234
+ ):
2235
+ """
2236
+ Decorator to wrap a function in an agent_run_context (sync or async).
2237
+ Injects agent_run_id and transcript_id as function attributes.
2238
+ Optionally accepts metadata to attach to the agent run context.
2239
+
2240
+ Example:
2241
+ @agent_run
2242
+ def my_func(x, y):
2243
+ print(my_func.docent.agent_run_id, my_func.docent.transcript_id)
2244
+
2245
+ @agent_run(metadata={"user": "John", "model": "gpt-4"})
2246
+ def my_func_with_metadata(x, y):
2247
+ print(my_func_with_metadata.docent.agent_run_id)
2248
+
2249
+ @agent_run(metadata={"config": {"model": "gpt-4", "temperature": 0.7}})
2250
+ async def my_async_func(z):
2251
+ print(my_async_func.docent.agent_run_id)
2252
+ """
2253
+ import functools
2254
+ import inspect
2255
+
2256
+ def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
2257
+ if inspect.iscoroutinefunction(f):
2258
+
2259
+ @functools.wraps(f)
2260
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
2261
+ async with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
2262
+ # Store docent data as function attributes
2263
+ setattr(
2264
+ async_wrapper,
2265
+ "docent",
2266
+ type(
2267
+ "DocentData",
2268
+ (),
2269
+ {
2270
+ "agent_run_id": agent_run_id,
2271
+ "transcript_id": transcript_id,
2272
+ },
2273
+ )(),
2274
+ )
2275
+ return await f(*args, **kwargs)
2276
+
2277
+ return async_wrapper
2278
+ else:
2279
+
2280
+ @functools.wraps(f)
2281
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
2282
+ with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
2283
+ # Store docent data as function attributes
2284
+ setattr(
2285
+ sync_wrapper,
2286
+ "docent",
2287
+ type(
2288
+ "DocentData",
2289
+ (),
2290
+ {
2291
+ "agent_run_id": agent_run_id,
2292
+ "transcript_id": transcript_id,
2293
+ },
2294
+ )(),
2295
+ )
2296
+ return f(*args, **kwargs)
2297
+
2298
+ return sync_wrapper
2299
+
2300
+ if func is None:
2301
+ return decorator
2302
+ else:
2303
+ return decorator(func)
2304
+
2305
+
2306
+ def agent_run_context(
2307
+ agent_run_id: Optional[str] = None,
2308
+ transcript_id: Optional[str] = None,
2309
+ metadata: Optional[Dict[str, Any]] = None,
2310
+ **attributes: Any,
2311
+ ) -> AgentRunContext:
2312
+ """
2313
+ Create an agent run context for tracing.
2314
+
2315
+ Args:
2316
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
2317
+ transcript_id: Optional transcript ID (auto-generated if not provided)
2318
+ metadata: Optional nested dictionary of metadata to attach as events
2319
+ **attributes: Additional attributes to add to the context
2320
+
2321
+ Returns:
2322
+ A context manager that can be used with both 'with' and 'async with'
2323
+
2324
+ Example:
2325
+ # Sync usage
2326
+ with agent_run_context() as (agent_run_id, transcript_id):
2327
+ pass
2328
+
2329
+ # Async usage
2330
+ async with agent_run_context() as (agent_run_id, transcript_id):
2331
+ pass
2332
+
2333
+ # With metadata
2334
+ with agent_run_context(metadata={"user": "John", "model": "gpt-4"}) as (agent_run_id, transcript_id):
2335
+ pass
2336
+ """
2337
+ return AgentRunContext(agent_run_id, transcript_id, metadata=metadata, **attributes)
2338
+
2339
+
2340
+ class TranscriptContext:
2341
+ """Context manager for creating and managing transcripts."""
2342
+
2343
+ def __init__(
2344
+ self,
2345
+ name: Optional[str] = None,
2346
+ transcript_id: Optional[str] = None,
2347
+ description: Optional[str] = None,
2348
+ metadata: Optional[Dict[str, Any]] = None,
2349
+ transcript_group_id: Optional[str] = None,
2350
+ ):
2351
+ self.name = name
2352
+ self.transcript_id = transcript_id
2353
+ self.description = description
2354
+ self.metadata = metadata
2355
+ self.transcript_group_id = transcript_group_id
2356
+ self._sync_context: Optional[Any] = None
2357
+ self._async_context: Optional[Any] = None
2358
+
2359
+ def __enter__(self) -> str:
2360
+ """Sync context manager entry."""
2361
+ if is_disabled("transcript_context"):
2362
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2363
+ return self.transcript_id
2364
+
2365
+ tracer = get_tracer("transcript_context")
2366
+ if tracer is None:
2367
+ logger.error("Cannot enter transcript_context because tracing is not initialized.")
2368
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2369
+ return self.transcript_id
2370
+ self._sync_context = tracer.transcript_context(
2371
+ name=self.name,
2372
+ transcript_id=self.transcript_id,
2373
+ description=self.description,
2374
+ metadata=self.metadata,
2375
+ transcript_group_id=self.transcript_group_id,
2376
+ )
2377
+ return self._sync_context.__enter__()
2378
+
2379
+ def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
2380
+ """Sync context manager exit."""
2381
+ if self._sync_context:
2382
+ self._sync_context.__exit__(exc_type, exc_val, exc_tb)
2383
+
2384
+ async def __aenter__(self) -> str:
2385
+ """Async context manager entry."""
2386
+ if is_disabled("transcript_context"):
2387
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2388
+ return self.transcript_id
2389
+
2390
+ tracer = get_tracer("transcript_context")
2391
+ if tracer is None:
2392
+ logger.error("Cannot enter transcript_context because tracing is not initialized.")
2393
+ self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
2394
+ return self.transcript_id
2395
+ self._async_context = tracer.async_transcript_context(
2396
+ name=self.name,
2397
+ transcript_id=self.transcript_id,
2398
+ description=self.description,
2399
+ metadata=self.metadata,
2400
+ transcript_group_id=self.transcript_group_id,
2401
+ )
2402
+ return await self._async_context.__aenter__()
2403
+
2404
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
2405
+ """Async context manager exit."""
2406
+ if self._async_context:
2407
+ await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
2408
+
2409
+
2410
+ def transcript(
2411
+ func: Optional[Callable[..., Any]] = None,
2412
+ *,
2413
+ name: Optional[str] = None,
2414
+ transcript_id: Optional[str] = None,
2415
+ description: Optional[str] = None,
2416
+ metadata: Optional[Dict[str, Any]] = None,
2417
+ transcript_group_id: Optional[str] = None,
2418
+ ):
2419
+ """
2420
+ Decorator to wrap a function in a transcript context.
2421
+ Injects transcript_id as a function attribute.
2422
+
2423
+ Example:
2424
+ @transcript
2425
+ def my_func(x, y):
2426
+ print(my_func.docent.transcript_id)
2427
+
2428
+ @transcript(name="data_processing", description="Process user data")
2429
+ def my_func_with_name(x, y):
2430
+ print(my_func_with_name.docent.transcript_id)
2431
+
2432
+ @transcript(metadata={"user": "John", "model": "gpt-4"})
2433
+ async def my_async_func(z):
2434
+ print(my_async_func.docent.transcript_id)
2435
+ """
2436
+ import functools
2437
+ import inspect
2438
+
2439
+ def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
2440
+ if inspect.iscoroutinefunction(f):
2441
+
2442
+ @functools.wraps(f)
2443
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
2444
+ async with TranscriptContext(
2445
+ name=name,
2446
+ transcript_id=transcript_id,
2447
+ description=description,
2448
+ metadata=metadata,
2449
+ transcript_group_id=transcript_group_id,
2450
+ ) as transcript_id_result:
2451
+ # Store docent data as function attributes
2452
+ setattr(
2453
+ async_wrapper,
2454
+ "docent",
2455
+ type(
2456
+ "DocentData",
2457
+ (),
2458
+ {
2459
+ "transcript_id": transcript_id_result,
2460
+ },
2461
+ )(),
2462
+ )
2463
+ return await f(*args, **kwargs)
2464
+
2465
+ return async_wrapper
2466
+ else:
2467
+
2468
+ @functools.wraps(f)
2469
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
2470
+ with TranscriptContext(
2471
+ name=name,
2472
+ transcript_id=transcript_id,
2473
+ description=description,
2474
+ metadata=metadata,
2475
+ transcript_group_id=transcript_group_id,
2476
+ ) as transcript_id_result:
2477
+ # Store docent data as function attributes
2478
+ setattr(
2479
+ sync_wrapper,
2480
+ "docent",
2481
+ type(
2482
+ "DocentData",
2483
+ (),
2484
+ {
2485
+ "transcript_id": transcript_id_result,
2486
+ },
2487
+ )(),
2488
+ )
2489
+ return f(*args, **kwargs)
2490
+
2491
+ return sync_wrapper
2492
+
2493
+ if func is None:
2494
+ return decorator
2495
+ else:
2496
+ return decorator(func)
2497
+
2498
+
2499
+ def transcript_context(
2500
+ name: Optional[str] = None,
2501
+ transcript_id: Optional[str] = None,
2502
+ description: Optional[str] = None,
2503
+ metadata: Optional[Dict[str, Any]] = None,
2504
+ transcript_group_id: Optional[str] = None,
2505
+ ) -> TranscriptContext:
2506
+ """
2507
+ Create a transcript context for tracing.
2508
+
2509
+ Args:
2510
+ name: Optional transcript name
2511
+ transcript_id: Optional transcript ID (auto-generated if not provided)
2512
+ description: Optional transcript description
2513
+ metadata: Optional metadata to attach to the transcript
2514
+ parent_transcript_id: Optional parent transcript ID
2515
+
2516
+ Returns:
2517
+ A context manager that can be used with both 'with' and 'async with'
2518
+
2519
+ Example:
2520
+ # Sync usage
2521
+ with transcript_context(name="data_processing") as transcript_id:
2522
+ pass
2523
+
2524
+ # Async usage
2525
+ async with transcript_context(description="Process user data") as transcript_id:
2526
+ pass
2527
+
2528
+ # With metadata
2529
+ with transcript_context(metadata={"user": "John", "model": "gpt-4"}) as transcript_id:
2530
+ pass
2531
+ """
2532
+ return TranscriptContext(name, transcript_id, description, metadata, transcript_group_id)
2533
+
2534
+
2535
+ class TranscriptGroupContext:
2536
+ """Context manager for creating and managing transcript groups."""
2537
+
2538
+ def __init__(
2539
+ self,
2540
+ name: Optional[str] = None,
2541
+ transcript_group_id: Optional[str] = None,
2542
+ description: Optional[str] = None,
2543
+ metadata: Optional[Dict[str, Any]] = None,
2544
+ parent_transcript_group_id: Optional[str] = None,
2545
+ ):
2546
+ self.name = name
2547
+ self.transcript_group_id = transcript_group_id
2548
+ self.description = description
2549
+ self.metadata = metadata
2550
+ self.parent_transcript_group_id = parent_transcript_group_id
2551
+ self._sync_context: Optional[Any] = None
2552
+ self._async_context: Optional[Any] = None
2553
+
2554
+ def __enter__(self) -> str:
2555
+ """Sync context manager entry."""
2556
+ if is_disabled("transcript_group_context"):
2557
+ self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
2558
+ return self.transcript_group_id
2559
+
2560
+ tracer = get_tracer("transcript_group_context")
2561
+ if tracer is None:
2562
+ logger.error(
2563
+ "Cannot enter transcript_group_context because tracing is not initialized."
2564
+ )
2565
+ self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
2566
+ return self.transcript_group_id
2567
+ self._sync_context = tracer.transcript_group_context(
2568
+ name=self.name,
2569
+ transcript_group_id=self.transcript_group_id,
2570
+ description=self.description,
2571
+ metadata=self.metadata,
2572
+ parent_transcript_group_id=self.parent_transcript_group_id,
2573
+ )
2574
+ return self._sync_context.__enter__()
2575
+
2576
+ def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
2577
+ """Sync context manager exit."""
2578
+ if self._sync_context:
2579
+ self._sync_context.__exit__(exc_type, exc_val, exc_tb)
2580
+
2581
+ async def __aenter__(self) -> str:
2582
+ """Async context manager entry."""
2583
+ if is_disabled("transcript_group_context"):
2584
+ self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
2585
+ return self.transcript_group_id
2586
+
2587
+ tracer = get_tracer("transcript_group_context")
2588
+ if tracer is None:
2589
+ logger.error(
2590
+ "Cannot enter transcript_group_context because tracing is not initialized."
2591
+ )
2592
+ self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
2593
+ return self.transcript_group_id
2594
+ self._async_context = tracer.async_transcript_group_context(
2595
+ name=self.name,
2596
+ transcript_group_id=self.transcript_group_id,
2597
+ description=self.description,
2598
+ metadata=self.metadata,
2599
+ parent_transcript_group_id=self.parent_transcript_group_id,
2600
+ )
2601
+ return await self._async_context.__aenter__()
2602
+
2603
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
2604
+ """Async context manager exit."""
2605
+ if self._async_context:
2606
+ await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
2607
+
2608
+
2609
+ def transcript_group(
2610
+ func: Optional[Callable[..., Any]] = None,
2611
+ *,
2612
+ name: Optional[str] = None,
2613
+ transcript_group_id: Optional[str] = None,
2614
+ description: Optional[str] = None,
2615
+ metadata: Optional[Dict[str, Any]] = None,
2616
+ parent_transcript_group_id: Optional[str] = None,
2617
+ ):
2618
+ """
2619
+ Decorator to wrap a function in a transcript group context.
2620
+ Injects transcript_group_id as a function attribute.
2621
+
2622
+ Example:
2623
+ @transcript_group
2624
+ def my_func(x, y):
2625
+ print(my_func.docent.transcript_group_id)
2626
+
2627
+ @transcript_group(name="data_processing", description="Process user data")
2628
+ def my_func_with_name(x, y):
2629
+ print(my_func_with_name.docent.transcript_group_id)
2630
+
2631
+ @transcript_group(metadata={"user": "John", "model": "gpt-4"})
2632
+ async def my_async_func(z):
2633
+ print(my_async_func.docent.transcript_group_id)
2634
+ """
2635
+ import functools
2636
+ import inspect
2637
+
2638
+ def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
2639
+ if inspect.iscoroutinefunction(f):
2640
+
2641
+ @functools.wraps(f)
2642
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
2643
+ async with TranscriptGroupContext(
2644
+ name=name,
2645
+ transcript_group_id=transcript_group_id,
2646
+ description=description,
2647
+ metadata=metadata,
2648
+ parent_transcript_group_id=parent_transcript_group_id,
2649
+ ) as transcript_group_id_result:
2650
+ # Store docent data as function attributes
2651
+ setattr(
2652
+ async_wrapper,
2653
+ "docent",
2654
+ type(
2655
+ "DocentData",
2656
+ (),
2657
+ {
2658
+ "transcript_group_id": transcript_group_id_result,
2659
+ },
2660
+ )(),
2661
+ )
2662
+ return await f(*args, **kwargs)
2663
+
2664
+ return async_wrapper
2665
+ else:
2666
+
2667
+ @functools.wraps(f)
2668
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
2669
+ with TranscriptGroupContext(
2670
+ name=name,
2671
+ transcript_group_id=transcript_group_id,
2672
+ description=description,
2673
+ metadata=metadata,
2674
+ parent_transcript_group_id=parent_transcript_group_id,
2675
+ ) as transcript_group_id_result:
2676
+ # Store docent data as function attributes
2677
+ setattr(
2678
+ sync_wrapper,
2679
+ "docent",
2680
+ type(
2681
+ "DocentData",
2682
+ (),
2683
+ {
2684
+ "transcript_group_id": transcript_group_id_result,
2685
+ },
2686
+ )(),
2687
+ )
2688
+ return f(*args, **kwargs)
2689
+
2690
+ return sync_wrapper
2691
+
2692
+ if func is None:
2693
+ return decorator
2694
+ else:
2695
+ return decorator(func)
2696
+
2697
+
2698
+ def transcript_group_context(
2699
+ name: Optional[str] = None,
2700
+ transcript_group_id: Optional[str] = None,
2701
+ description: Optional[str] = None,
2702
+ metadata: Optional[Dict[str, Any]] = None,
2703
+ parent_transcript_group_id: Optional[str] = None,
2704
+ ) -> TranscriptGroupContext:
2705
+ """
2706
+ Create a transcript group context for tracing.
2707
+
2708
+ Args:
2709
+ name: Optional transcript group name
2710
+ transcript_group_id: Optional transcript group ID (auto-generated if not provided)
2711
+ description: Optional transcript group description
2712
+ metadata: Optional metadata to attach to the transcript group
2713
+ parent_transcript_group_id: Optional parent transcript group ID
2714
+
2715
+ Returns:
2716
+ A context manager that can be used with both 'with' and 'async with'
2717
+
2718
+ Example:
2719
+ # Sync usage
2720
+ with transcript_group_context(name="data_processing") as transcript_group_id:
2721
+ pass
2722
+
2723
+ # Async usage
2724
+ async with transcript_group_context(description="Process user data") as transcript_group_id:
2725
+ pass
2726
+
2727
+ # With metadata
2728
+ with transcript_group_context(metadata={"user": "John", "model": "gpt-4"}) as transcript_group_id:
2729
+ pass
2730
+ """
2731
+ return TranscriptGroupContext(
2732
+ name, transcript_group_id, description, metadata, parent_transcript_group_id
2733
+ )
2734
+
2735
+
2736
+ def _is_notebook() -> bool:
2737
+ """Check if we're running in a Jupyter notebook."""
2738
+ try:
2739
+ return "ipykernel" in sys.modules
2740
+ except Exception:
2741
+ return False