docent-python 0.1.3a0__py3-none-any.whl → 0.1.4a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

docent/trace_temp.py ADDED
@@ -0,0 +1,1086 @@
1
+ import asyncio
2
+ import atexit
3
+ import contextvars
4
+ import inspect
5
+ import itertools
6
+ import logging
7
+ import os
8
+ import signal
9
+ import sys
10
+ import threading
11
+ import uuid
12
+ from collections import defaultdict
13
+ from contextlib import asynccontextmanager, contextmanager
14
+ from contextvars import ContextVar, Token
15
+ from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Optional, Union
16
+
17
+ from opentelemetry import trace
18
+ from opentelemetry.context import Context
19
+ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
20
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
21
+ from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
22
+ from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
23
+ from opentelemetry.instrumentation.langchain import LangchainInstrumentor
24
+ from opentelemetry.instrumentation.openai import OpenAIInstrumentor
25
+ from opentelemetry.instrumentation.threading import ThreadingInstrumentor
26
+ from opentelemetry.sdk.resources import Resource
27
+ from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
28
+ from opentelemetry.sdk.trace.export import (
29
+ BatchSpanProcessor,
30
+ ConsoleSpanExporter,
31
+ SimpleSpanProcessor,
32
+ )
33
+ from opentelemetry.trace import Span
34
+
35
+ # Configure logging
36
+ logging.basicConfig(level=logging.INFO)
37
+ logger = logging.getLogger(__name__)
38
+ logger.disabled = True
39
+
40
+ # Default configuration
41
+ DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
42
+
43
+
44
+ def _is_async_context() -> bool:
45
+ """Detect if we're in an async context."""
46
+ try:
47
+ # Check if we're in an async function
48
+ frame = inspect.currentframe()
49
+ while frame:
50
+ if frame.f_code.co_flags & inspect.CO_COROUTINE:
51
+ return True
52
+ frame = frame.f_back
53
+ return False
54
+ except:
55
+ return False
56
+
57
+
58
+ def _is_running_in_event_loop() -> bool:
59
+ """Check if we're running in an event loop."""
60
+ try:
61
+ asyncio.get_running_loop()
62
+ return True
63
+ except RuntimeError:
64
+ return False
65
+
66
+
67
+ def _is_notebook() -> bool:
68
+ """Check if we're running in a Jupyter notebook."""
69
+ try:
70
+ return "ipykernel" in sys.modules
71
+ except:
72
+ return False
73
+
74
+
75
+ class DocentTracer:
76
+ """Manages Docent tracing setup and provides tracing utilities."""
77
+
78
+ def __init__(
79
+ self,
80
+ collection_name: str = "default-collection-name",
81
+ collection_id: Optional[str] = None,
82
+ agent_run_id: Optional[str] = None,
83
+ endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
84
+ headers: Optional[Dict[str, str]] = None,
85
+ api_key: Optional[str] = None,
86
+ enable_console_export: bool = False,
87
+ enable_otlp_export: bool = True,
88
+ disable_batch: bool = False,
89
+ span_postprocess_callback: Optional[Callable[[ReadableSpan], None]] = None,
90
+ ):
91
+ """
92
+ Initialize Docent tracing manager.
93
+
94
+ Args:
95
+ collection_name: Name of the collection for resource attributes
96
+ collection_id: Optional collection ID (auto-generated if not provided)
97
+ agent_run_id: Optional agent_run_id to use for code outside of an agent run context (auto-generated if not provided)
98
+ endpoint: OTLP endpoint URL(s) - can be a single string or list of strings for multiple endpoints
99
+ headers: Optional headers for authentication
100
+ api_key: Optional API key for bearer token authentication (takes precedence over env var)
101
+ enable_console_export: Whether to export to console
102
+ enable_otlp_export: Whether to export to OTLP endpoint
103
+ disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
104
+ span_postprocess_callback: Optional callback for post-processing spans
105
+ """
106
+ self.collection_name: str = collection_name
107
+ self.collection_id: str = collection_id if collection_id else str(uuid.uuid4())
108
+ self.default_agent_run_id: str = agent_run_id if agent_run_id else str(uuid.uuid4())
109
+ self.endpoints: List[str]
110
+
111
+ # Handle endpoint parameter - convert to list if it's a string
112
+ if isinstance(endpoint, str):
113
+ self.endpoints = [endpoint]
114
+ else:
115
+ self.endpoints = endpoint
116
+
117
+ # Build headers with authentication if provided
118
+ self.headers = headers or {}
119
+
120
+ # Handle API key authentication (takes precedence over custom headers)
121
+ if api_key:
122
+ self.headers["Authorization"] = f"Bearer {api_key}"
123
+ logger.info(f"Using API key authentication for {self.collection_name}")
124
+ elif self.headers.get("Authorization"):
125
+ logger.info(f"Using custom Authorization header for {self.collection_name}")
126
+ else:
127
+ logger.info(f"No authentication configured for {self.collection_name}")
128
+
129
+ self.enable_console_export = enable_console_export
130
+ self.enable_otlp_export = enable_otlp_export
131
+ self.disable_batch = disable_batch
132
+ self.span_postprocess_callback = span_postprocess_callback
133
+
134
+ # Use separate tracer provider to avoid interfering with existing OTEL setup
135
+ self._tracer_provider: Optional[TracerProvider] = None
136
+ self._root_span: Optional[Span] = None
137
+ self._root_context: Context = Context()
138
+ self._tracer: Optional[trace.Tracer] = None
139
+ self._initialized: bool = False
140
+ self._cleanup_registered: bool = False
141
+ self._disabled: bool = False
142
+ self._spans_processors: List[Union[BatchSpanProcessor, SimpleSpanProcessor]] = []
143
+
144
+ # Context variables for agent_run_id and transcript_id (thread/async safe)
145
+ self._collection_id_var: ContextVar[str] = contextvars.ContextVar("docent_collection_id")
146
+ self._agent_run_id_var: ContextVar[str] = contextvars.ContextVar("docent_agent_run_id")
147
+ self._transcript_id_var: ContextVar[str] = contextvars.ContextVar("docent_transcript_id")
148
+ self._attributes_var: ContextVar[dict[str, Any]] = contextvars.ContextVar(
149
+ "docent_attributes"
150
+ )
151
+ # Store atomic span order counters per transcript_id to persist across context switches
152
+ self._transcript_counters: defaultdict[str, itertools.count[int]] = defaultdict(
153
+ lambda: itertools.count(0)
154
+ )
155
+ self._transcript_counter_lock = threading.Lock()
156
+
157
+ def get_current_docent_span(self) -> Optional[Span]:
158
+ """
159
+ Get the current span from our isolated context.
160
+ This never touches the global OpenTelemetry context.
161
+ """
162
+ if self._root_context is None:
163
+ return None
164
+
165
+ try:
166
+ return trace.get_current_span(context=self._root_context)
167
+ except Exception:
168
+ return None
169
+
170
+ def _register_cleanup(self):
171
+ """Register cleanup handlers."""
172
+ if self._cleanup_registered:
173
+ return
174
+
175
+ # Register atexit handler
176
+ atexit.register(self.cleanup)
177
+
178
+ # Register signal handlers for graceful shutdown
179
+ try:
180
+ signal.signal(signal.SIGINT, self._signal_handler)
181
+ signal.signal(signal.SIGTERM, self._signal_handler)
182
+ except (ValueError, OSError):
183
+ # Signal handlers might not work in all environments
184
+ pass
185
+
186
+ self._cleanup_registered = True
187
+
188
+ def _next_span_order(self, transcript_id: str) -> int:
189
+ """
190
+ Get the next atomic span order for a given transcript_id.
191
+ Thread-safe and guaranteed to be unique and monotonic.
192
+ """
193
+ with self._transcript_counter_lock:
194
+ return next(self._transcript_counters[transcript_id])
195
+
196
+ def _signal_handler(self, signum: int, frame: Optional[object]):
197
+ """Handle shutdown signals."""
198
+ self.cleanup()
199
+ sys.exit(0)
200
+
201
+ def _init_spans_exporter(self, endpoint: str) -> Optional[Union[HTTPExporter, GRPCExporter]]:
202
+ """Initialize the appropriate span exporter based on endpoint."""
203
+ if not self.enable_otlp_export:
204
+ return None
205
+
206
+ try:
207
+ if "http" in endpoint.lower() or "https" in endpoint.lower():
208
+ http_exporter: HTTPExporter = HTTPExporter(
209
+ endpoint=f"{endpoint}/v1/traces", headers=self.headers
210
+ )
211
+ return http_exporter
212
+ else:
213
+ grpc_exporter: GRPCExporter = GRPCExporter(endpoint=endpoint, headers=self.headers)
214
+ return grpc_exporter
215
+ except Exception as e:
216
+ logger.error(f"Failed to initialize span exporter for {endpoint}: {e}")
217
+ return None
218
+
219
+ def _init_spans_exporters(self) -> List[Union[HTTPExporter, GRPCExporter]]:
220
+ """Initialize span exporters for all endpoints."""
221
+ exporters: List[Union[HTTPExporter, GRPCExporter]] = []
222
+
223
+ for endpoint in self.endpoints:
224
+ exporter = self._init_spans_exporter(endpoint)
225
+ if exporter:
226
+ exporters.append(exporter)
227
+ logger.info(f"Initialized exporter for endpoint: {endpoint}")
228
+ else:
229
+ logger.warning(f"Failed to initialize exporter for endpoint: {endpoint}")
230
+
231
+ return exporters
232
+
233
+ def _create_span_processor(
234
+ self, exporter: Union[HTTPExporter, GRPCExporter, ConsoleSpanExporter]
235
+ ) -> Union[SimpleSpanProcessor, BatchSpanProcessor]:
236
+ """Create appropriate span processor based on configuration."""
237
+ if self.disable_batch or _is_notebook():
238
+ simple_processor: SimpleSpanProcessor = SimpleSpanProcessor(exporter)
239
+ return simple_processor
240
+ else:
241
+ batch_processor: BatchSpanProcessor = BatchSpanProcessor(exporter)
242
+ return batch_processor
243
+
244
+ def initialize(self):
245
+ """Initialize Docent tracing setup."""
246
+ if self._initialized or self._disabled:
247
+ return
248
+
249
+ try:
250
+ # Create our own isolated tracer provider
251
+ self._tracer_provider = TracerProvider(
252
+ resource=Resource.create({"service.name": self.collection_name})
253
+ )
254
+
255
+ # Add custom span processor for run_id and transcript_id
256
+ class ContextSpanProcessor(SpanProcessor):
257
+ def __init__(self, manager: "DocentTracer"):
258
+ self.manager: "DocentTracer" = manager
259
+
260
+ def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
261
+ # Add collection_id, agent_run_id, transcript_id, and any other current attributes
262
+ # Always add collection_id as it's always available
263
+ span.set_attribute("collection_id", self.manager.collection_id)
264
+
265
+ # Handle agent_run_id
266
+ try:
267
+ agent_run_id: str = self.manager._agent_run_id_var.get()
268
+ if agent_run_id:
269
+ span.set_attribute("agent_run_id", agent_run_id)
270
+ else:
271
+ span.set_attribute("agent_run_id_default", True)
272
+ span.set_attribute("agent_run_id", self.manager.default_agent_run_id)
273
+ except LookupError:
274
+ span.set_attribute("agent_run_id_default", True)
275
+ span.set_attribute("agent_run_id", self.manager.default_agent_run_id)
276
+
277
+ # Handle transcript_id
278
+ try:
279
+ transcript_id: str = self.manager._transcript_id_var.get()
280
+ if transcript_id:
281
+ span.set_attribute("transcript_id", transcript_id)
282
+ # Add atomic span order number
283
+ span_order: int = self.manager._next_span_order(transcript_id)
284
+ span.set_attribute("span_order", span_order)
285
+ except LookupError:
286
+ # transcript_id not available, skip it
287
+ pass
288
+
289
+ # Handle attributes
290
+ try:
291
+ attributes: dict[str, Any] = self.manager._attributes_var.get()
292
+ for key, value in attributes.items():
293
+ span.set_attribute(key, value)
294
+ except LookupError:
295
+ # attributes not available, skip them
296
+ pass
297
+
298
+ def on_end(self, span: ReadableSpan) -> None:
299
+ pass
300
+
301
+ def shutdown(self) -> None:
302
+ pass
303
+
304
+ def force_flush(self, timeout_millis: Optional[float] = None) -> bool:
305
+ return True
306
+
307
+ # Configure span exporters for our isolated provider
308
+ if self.enable_otlp_export:
309
+ otlp_exporters: List[Union[HTTPExporter, GRPCExporter]] = (
310
+ self._init_spans_exporters()
311
+ )
312
+
313
+ if otlp_exporters:
314
+ # Create a processor for each exporter
315
+ for exporter in otlp_exporters:
316
+ otlp_processor: Union[SimpleSpanProcessor, BatchSpanProcessor] = (
317
+ self._create_span_processor(exporter)
318
+ )
319
+ self._tracer_provider.add_span_processor(otlp_processor)
320
+ self._spans_processors.append(otlp_processor)
321
+
322
+ logger.info(
323
+ f"Added {len(otlp_exporters)} OTLP exporters for {len(self.endpoints)} endpoints"
324
+ )
325
+ else:
326
+ logger.warning("Failed to initialize OTLP exporter")
327
+
328
+ if self.enable_console_export:
329
+ console_exporter: ConsoleSpanExporter = ConsoleSpanExporter()
330
+ console_processor: Union[SimpleSpanProcessor, BatchSpanProcessor] = (
331
+ self._create_span_processor(console_exporter)
332
+ )
333
+ self._tracer_provider.add_span_processor(console_processor)
334
+ self._spans_processors.append(console_processor)
335
+
336
+ # Add our custom context span processor
337
+ context_processor = ContextSpanProcessor(self)
338
+ self._tracer_provider.add_span_processor(context_processor)
339
+
340
+ # Get tracer from our isolated provider (don't set global provider)
341
+ self._tracer = self._tracer_provider.get_tracer(__name__)
342
+
343
+ # Start root span
344
+ self._root_span = self._tracer.start_span(
345
+ "application_session",
346
+ attributes={
347
+ "service.name": self.collection_name,
348
+ "session.type": "application_root",
349
+ },
350
+ )
351
+ self._root_context = trace.set_span_in_context(
352
+ self._root_span, context=self._root_context
353
+ )
354
+
355
+ # Instrument threading for better context propagation
356
+ try:
357
+ ThreadingInstrumentor().instrument()
358
+ except Exception as e:
359
+ logger.warning(f"Failed to instrument threading: {e}")
360
+
361
+ # Instrument OpenAI with our isolated tracer provider
362
+ try:
363
+ OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
364
+ logger.info("Instrumented OpenAI")
365
+ except Exception as e:
366
+ logger.warning(f"Failed to instrument OpenAI: {e}")
367
+
368
+ # Instrument Anthropic with our isolated tracer provider
369
+ try:
370
+ AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
371
+ logger.info("Instrumented Anthropic")
372
+ except Exception as e:
373
+ logger.warning(f"Failed to instrument Anthropic: {e}")
374
+
375
+ # Instrument Bedrock with our isolated tracer provider
376
+ try:
377
+ BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
378
+ logger.info("Instrumented Bedrock")
379
+ except Exception as e:
380
+ logger.warning(f"Failed to instrument Bedrock: {e}")
381
+
382
+ # Instrument LangChain with our isolated tracer provider
383
+ try:
384
+ LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
385
+ logger.info("Instrumented LangChain")
386
+ except Exception as e:
387
+ logger.warning(f"Failed to instrument LangChain: {e}")
388
+
389
+ # Register cleanup handlers
390
+ self._register_cleanup()
391
+
392
+ self._initialized = True
393
+ logger.info(f"Docent tracing initialized for {self.collection_name}")
394
+
395
+ except Exception as e:
396
+ logger.error(f"Failed to initialize Docent tracing: {e}")
397
+ self._disabled = True
398
+ raise
399
+
400
+ def cleanup(self):
401
+ """Clean up Docent tracing resources."""
402
+ try:
403
+ # Create an explicit end-of-trace span before ending the root span
404
+ if self._tracer and self._root_span:
405
+ end_span = self._tracer.start_span(
406
+ "trace_end",
407
+ context=self._root_context,
408
+ attributes={
409
+ "event.type": "trace_end",
410
+ },
411
+ )
412
+ end_span.end()
413
+
414
+ if (
415
+ self._root_span
416
+ and hasattr(self._root_span, "is_recording")
417
+ and self._root_span.is_recording()
418
+ ):
419
+ self._root_span.end()
420
+ elif self._root_span:
421
+ # Fallback if is_recording is not available
422
+ self._root_span.end()
423
+
424
+ self._root_span = None
425
+ self._root_context = None # type: ignore
426
+
427
+ # Shutdown our isolated tracer provider
428
+ if self._tracer_provider:
429
+ self._tracer_provider.shutdown()
430
+ self._tracer_provider = None
431
+ except Exception as e:
432
+ logger.error(f"Error during cleanup: {e}")
433
+
434
+ def close(self):
435
+ """Explicitly close the Docent tracing manager."""
436
+ try:
437
+ self.cleanup()
438
+ if self._cleanup_registered:
439
+ atexit.unregister(self.cleanup)
440
+ self._cleanup_registered = False
441
+ except Exception as e:
442
+ logger.error(f"Error during close: {e}")
443
+
444
+ def flush(self) -> None:
445
+ """Force flush all spans to exporters."""
446
+ try:
447
+ for processor in self._spans_processors:
448
+ if hasattr(processor, "force_flush"):
449
+ processor.force_flush()
450
+ except Exception as e:
451
+ logger.error(f"Error during flush: {e}")
452
+
453
+ def set_disabled(self, disabled: bool) -> None:
454
+ """Enable or disable tracing."""
455
+ self._disabled = disabled
456
+ if disabled and self._initialized:
457
+ self.cleanup()
458
+
459
+ def verify_initialized(self) -> bool:
460
+ """Verify if the manager is properly initialized."""
461
+ if self._disabled:
462
+ return False
463
+ return self._initialized
464
+
465
+ def __enter__(self) -> "DocentTracer":
466
+ """Context manager entry."""
467
+ self.initialize()
468
+ return self
469
+
470
+ def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
471
+ """Context manager exit."""
472
+ self.close()
473
+
474
+ @property
475
+ def tracer(self) -> Optional[trace.Tracer]:
476
+ """Get the tracer instance."""
477
+ if not self._initialized:
478
+ self.initialize()
479
+ return self._tracer
480
+
481
+ @property
482
+ def root_context(self) -> Optional[Context]:
483
+ """Get the root context."""
484
+ if not self._initialized:
485
+ self.initialize()
486
+ return self._root_context
487
+
488
+ @contextmanager
489
+ def span(self, name: str, attributes: Optional[Dict[str, Any]] = None) -> Iterator[Span]:
490
+ """
491
+ Context manager for creating spans with attributes.
492
+ """
493
+ if not self._initialized:
494
+ self.initialize()
495
+
496
+ if self._tracer is None:
497
+ raise RuntimeError("Tracer not initialized")
498
+
499
+ span_attributes: dict[str, Any] = attributes or {}
500
+
501
+ with self._tracer.start_as_current_span(
502
+ name, context=self._root_context, attributes=span_attributes
503
+ ) as span:
504
+ yield span
505
+
506
+ @asynccontextmanager
507
+ async def async_span(
508
+ self, name: str, attributes: Optional[Dict[str, Any]] = None
509
+ ) -> AsyncIterator[Span]:
510
+ """
511
+ Async context manager for creating spans with attributes.
512
+
513
+ Args:
514
+ name: Name of the span
515
+ attributes: Dictionary of attributes to add to the span
516
+ """
517
+ if not self._initialized:
518
+ self.initialize()
519
+
520
+ if self._tracer is None:
521
+ raise RuntimeError("Tracer not initialized")
522
+
523
+ span_attributes: dict[str, Any] = attributes or {}
524
+
525
+ with self._tracer.start_as_current_span(
526
+ name, context=self._root_context, attributes=span_attributes
527
+ ) as span:
528
+ yield span
529
+
530
+ @contextmanager
531
+ def agent_run_context(
532
+ self,
533
+ agent_run_id: Optional[str] = None,
534
+ transcript_id: Optional[str] = None,
535
+ metadata: Optional[Dict[str, Any]] = None,
536
+ **attributes: Any,
537
+ ) -> Iterator[tuple[str, str]]:
538
+ """
539
+ Context manager for setting up an agent run context.
540
+
541
+ Args:
542
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
543
+ transcript_id: Optional transcript ID (auto-generated if not provided)
544
+ metadata: Optional nested dictionary of metadata to attach as events
545
+ **attributes: Additional attributes to add to the context
546
+
547
+ Yields:
548
+ Tuple of (agent_run_id, transcript_id)
549
+ """
550
+ if not self._initialized:
551
+ self.initialize()
552
+
553
+ if self._tracer is None:
554
+ raise RuntimeError("Tracer not initialized")
555
+
556
+ if agent_run_id is None:
557
+ agent_run_id = str(uuid.uuid4())
558
+ if transcript_id is None:
559
+ transcript_id = str(uuid.uuid4())
560
+
561
+ # Set context variables for this execution context
562
+ agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
563
+ transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
564
+ attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
565
+
566
+ try:
567
+ # Create a span with the agent run attributes
568
+ span_attributes: dict[str, Any] = {
569
+ "agent_run_id": agent_run_id,
570
+ "transcript_id": transcript_id,
571
+ **attributes,
572
+ }
573
+ with self._tracer.start_as_current_span(
574
+ "agent_run_context", context=self._root_context, attributes=span_attributes
575
+ ) as _span:
576
+ # Attach metadata as events if provided
577
+ if metadata:
578
+ _add_metadata_event_to_span(_span, metadata)
579
+
580
+ yield agent_run_id, transcript_id
581
+ finally:
582
+ self._agent_run_id_var.reset(agent_run_id_token)
583
+ self._transcript_id_var.reset(transcript_id_token)
584
+ self._attributes_var.reset(attributes_token)
585
+
586
+ @asynccontextmanager
587
+ async def async_agent_run_context(
588
+ self,
589
+ agent_run_id: Optional[str] = None,
590
+ transcript_id: Optional[str] = None,
591
+ metadata: Optional[Dict[str, Any]] = None,
592
+ **attributes: Any,
593
+ ) -> AsyncIterator[tuple[str, str]]:
594
+ """
595
+ Async context manager for setting up an agent run context.
596
+ Modifies the OpenTelemetry context so all spans inherit agent_run_id and transcript_id.
597
+
598
+ Args:
599
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
600
+ transcript_id: Optional transcript ID (auto-generated if not provided)
601
+ metadata: Optional nested dictionary of metadata to attach as events
602
+ **attributes: Additional attributes to add to the context
603
+
604
+ Yields:
605
+ Tuple of (agent_run_id, transcript_id)
606
+ """
607
+ if not self._initialized:
608
+ self.initialize()
609
+
610
+ if self._tracer is None:
611
+ raise RuntimeError("Tracer not initialized")
612
+
613
+ if agent_run_id is None:
614
+ agent_run_id = str(uuid.uuid4())
615
+ if transcript_id is None:
616
+ transcript_id = str(uuid.uuid4())
617
+
618
+ # Set context variables for this execution context
619
+ agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
620
+ transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
621
+ attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
622
+
623
+ try:
624
+ # Create a span with the agent run attributes
625
+ span_attributes: dict[str, Any] = {
626
+ "agent_run_id": agent_run_id,
627
+ "transcript_id": transcript_id,
628
+ **attributes,
629
+ }
630
+ with self._tracer.start_as_current_span(
631
+ "agent_run_context", context=self._root_context, attributes=span_attributes
632
+ ) as _span:
633
+ # Attach metadata as events if provided
634
+ if metadata:
635
+ _add_metadata_event_to_span(_span, metadata)
636
+
637
+ yield agent_run_id, transcript_id
638
+ finally:
639
+ self._agent_run_id_var.reset(agent_run_id_token)
640
+ self._transcript_id_var.reset(transcript_id_token)
641
+ self._attributes_var.reset(attributes_token)
642
+
643
+ def start_transcript(
644
+ self,
645
+ agent_run_id: Optional[str] = None,
646
+ transcript_id: Optional[str] = None,
647
+ **attributes: Any,
648
+ ) -> tuple[Any, str, str]:
649
+ """
650
+ Manually start a transcript span.
651
+
652
+ Args:
653
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
654
+ transcript_id: Optional transcript ID (auto-generated if not provided)
655
+ **attributes: Additional attributes to add to the span
656
+
657
+ Returns:
658
+ Tuple of (span, agent_run_id, transcript_id)
659
+ """
660
+ if not self._initialized:
661
+ self.initialize()
662
+
663
+ if self._tracer is None:
664
+ raise RuntimeError("Tracer not initialized")
665
+
666
+ if agent_run_id is None:
667
+ agent_run_id = str(uuid.uuid4())
668
+ if transcript_id is None:
669
+ transcript_id = str(uuid.uuid4())
670
+
671
+ span_attributes: dict[str, Any] = {
672
+ "agent_run_id": agent_run_id,
673
+ "transcript_id": transcript_id,
674
+ **attributes,
675
+ }
676
+
677
+ span: Any = self._tracer.start_span(
678
+ "transcript_span", context=self._root_context, attributes=span_attributes
679
+ )
680
+
681
+ return span, agent_run_id, transcript_id
682
+
683
+ def stop_transcript(self, span: Span) -> None:
684
+ """
685
+ Manually stop a transcript span.
686
+
687
+ Args:
688
+ span: The span to stop
689
+ """
690
+ if span and hasattr(span, "end"):
691
+ span.end()
692
+
693
+ def start_span(self, name: str, attributes: Optional[Dict[str, Any]] = None) -> Span:
694
+ """
695
+ Manually start a span.
696
+
697
+ Args:
698
+ name: Name of the span
699
+ attributes: Dictionary of attributes to add to the span
700
+
701
+ Returns:
702
+ The created span
703
+ """
704
+ if not self._initialized:
705
+ self.initialize()
706
+
707
+ if self._tracer is None:
708
+ raise RuntimeError("Tracer not initialized")
709
+
710
+ span_attributes: dict[str, Any] = attributes or {}
711
+
712
+ span: Span = self._tracer.start_span(
713
+ name, context=self._root_context, attributes=span_attributes
714
+ )
715
+
716
+ return span
717
+
718
+ def stop_span(self, span: Span) -> None:
719
+ """
720
+ Manually stop a span.
721
+
722
+ Args:
723
+ span: The span to stop
724
+ """
725
+ if span and hasattr(span, "end"):
726
+ span.end()
727
+
728
+
729
+ # Global instance for easy access
730
+ _global_tracer: Optional[DocentTracer] = None
731
+
732
+
733
+ def initialize_tracing(
734
+ collection_name: str = "default-service",
735
+ collection_id: Optional[str] = None,
736
+ endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
737
+ headers: Optional[Dict[str, str]] = None,
738
+ api_key: Optional[str] = None,
739
+ enable_console_export: bool = False,
740
+ enable_otlp_export: bool = True,
741
+ disable_batch: bool = False,
742
+ span_postprocess_callback: Optional[Callable[[ReadableSpan], None]] = None,
743
+ ) -> DocentTracer:
744
+ """
745
+ Initialize the global Docent tracer.
746
+
747
+ This is the primary entry point for setting up Docent tracing.
748
+ It creates a global singleton instance that can be accessed via get_tracer().
749
+
750
+ Args:
751
+ collection_name: Name of the collection
752
+ collection_id: Optional collection ID (auto-generated if not provided)
753
+ endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
754
+ headers: Optional headers for authentication
755
+ api_key: Optional API key for bearer token authentication (takes precedence over env var)
756
+ enable_console_export: Whether to export spans to console
757
+ enable_otlp_export: Whether to export spans to OTLP endpoint
758
+ disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
759
+ span_postprocess_callback: Optional callback for post-processing spans
760
+
761
+ Returns:
762
+ The initialized Docent tracer
763
+
764
+ Example:
765
+ # Basic setup
766
+ initialize_tracing("my-collection")
767
+ """
768
+ global _global_tracer
769
+
770
+ # Check for API key in environment variable if not provided as parameter
771
+ if api_key is None:
772
+ env_api_key: Optional[str] = os.environ.get("DOCENT_API_KEY")
773
+ api_key = env_api_key
774
+
775
+ if _global_tracer is None:
776
+ _global_tracer = DocentTracer(
777
+ collection_name=collection_name,
778
+ collection_id=collection_id,
779
+ endpoint=endpoint,
780
+ headers=headers,
781
+ api_key=api_key,
782
+ enable_console_export=enable_console_export,
783
+ enable_otlp_export=enable_otlp_export,
784
+ disable_batch=disable_batch,
785
+ span_postprocess_callback=span_postprocess_callback,
786
+ )
787
+ _global_tracer.initialize()
788
+ else:
789
+ # If already initialized, ensure it's properly set up
790
+ _global_tracer.initialize()
791
+
792
+ return _global_tracer
793
+
794
+
795
+ def get_tracer() -> DocentTracer:
796
+ """Get the global Docent tracer."""
797
+ if _global_tracer is None:
798
+ # Auto-initialize with defaults if not already done
799
+ return initialize_tracing()
800
+ return _global_tracer
801
+
802
+
803
+ def close_tracing() -> None:
804
+ """Close the global Docent tracer."""
805
+ global _global_tracer
806
+ if _global_tracer:
807
+ _global_tracer.close()
808
+ _global_tracer = None
809
+
810
+
811
+ def flush_tracing() -> None:
812
+ """Force flush all spans to exporters."""
813
+ if _global_tracer:
814
+ _global_tracer.flush()
815
+
816
+
817
+ def verify_initialized() -> bool:
818
+ """Verify if the global Docent tracer is properly initialized."""
819
+ if _global_tracer is None:
820
+ return False
821
+ return _global_tracer.verify_initialized()
822
+
823
+
824
+ def set_disabled(disabled: bool) -> None:
825
+ """Enable or disable global tracing."""
826
+ if _global_tracer:
827
+ _global_tracer.set_disabled(disabled)
828
+
829
+
830
+ def get_api_key() -> Optional[str]:
831
+ """
832
+ Get the API key from environment variable.
833
+
834
+ Returns:
835
+ The API key from DOCENT_API_KEY environment variable, or None if not set
836
+ """
837
+ return os.environ.get("DOCENT_API_KEY")
838
+
839
+
840
+ def agent_run_score(name: str, score: float, attributes: Optional[Dict[str, Any]] = None) -> None:
841
+ """
842
+ Record a score event on the current span.
843
+ Automatically works in both sync and async contexts.
844
+
845
+ Args:
846
+ name: Name of the score metric
847
+ score: Numeric score value
848
+ attributes: Optional additional attributes for the score event
849
+ """
850
+ try:
851
+ # Get current span from our isolated context instead of global context
852
+ current_span: Optional[Span] = get_tracer().get_current_docent_span()
853
+ if current_span and hasattr(current_span, "add_event"):
854
+ event_attributes: dict[str, Any] = {
855
+ "score.name": name,
856
+ "score.value": score,
857
+ "event.type": "score",
858
+ }
859
+ if attributes:
860
+ event_attributes.update(attributes)
861
+
862
+ current_span.add_event(name="agent_run_score", attributes=event_attributes)
863
+ else:
864
+ logger.warning("No current span available for recording score")
865
+ except Exception as e:
866
+ logger.error(f"Failed to record score event: {e}")
867
+
868
+
869
+ def _flatten_dict(d: Dict[str, Any], prefix: str = "") -> Dict[str, Any]:
870
+ """Flatten nested dictionary with dot notation."""
871
+ flattened: Dict[str, Any] = {}
872
+ for key, value in d.items():
873
+ new_key = f"{prefix}.{key}" if prefix else key
874
+ if isinstance(value, dict):
875
+ flattened.update(_flatten_dict(dict(value), new_key)) # type: ignore
876
+ else:
877
+ flattened[new_key] = value
878
+ return flattened
879
+
880
+
881
+ def _add_metadata_event_to_span(span: Span, metadata: Dict[str, Any]) -> None:
882
+ """
883
+ Add metadata as an event to a span.
884
+
885
+ Args:
886
+ span: The span to add the event to
887
+ metadata: Dictionary of metadata (can be nested)
888
+ """
889
+ if span and hasattr(span, "add_event"):
890
+ event_attributes: dict[str, Any] = {
891
+ "event.type": "metadata",
892
+ }
893
+
894
+ # Flatten nested metadata and add as event attributes
895
+ flattened_metadata = _flatten_dict(metadata)
896
+ for key, value in flattened_metadata.items():
897
+ event_attributes[f"metadata.{key}"] = value
898
+ span.add_event(name="agent_run_metadata", attributes=event_attributes)
899
+
900
+
901
+ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
902
+ """
903
+ Record metadata as an event on the current span.
904
+ Automatically works in both sync and async contexts.
905
+ Supports nested dictionaries by flattening them with dot notation.
906
+
907
+ Args:
908
+ metadata: Dictionary of metadata to attach to the current span (can be nested)
909
+
910
+ Example:
911
+ agent_run_metadata({"user": "John", "id": 123, "flagged": True})
912
+ agent_run_metadata({"user": {"id": "123", "name": "John"}, "config": {"model": "gpt-4"}})
913
+ """
914
+ try:
915
+ current_span: Optional[Span] = get_tracer().get_current_docent_span()
916
+ if current_span:
917
+ _add_metadata_event_to_span(current_span, metadata)
918
+ else:
919
+ logger.warning("No current span available for recording metadata")
920
+ except Exception as e:
921
+ logger.error(f"Failed to record metadata event: {e}")
922
+
923
+
924
+ # Unified functions that automatically detect context
925
+ @asynccontextmanager
926
+ async def span(name: str, attributes: Optional[Dict[str, Any]] = None) -> AsyncIterator[Span]:
927
+ """
928
+ Automatically choose sync or async span based on context.
929
+ Can be used with both 'with' and 'async with'.
930
+ """
931
+ if _is_async_context() or _is_running_in_event_loop():
932
+ async with get_tracer().async_span(name, attributes) as span:
933
+ yield span
934
+ else:
935
+ with get_tracer().span(name, attributes) as span:
936
+ yield span
937
+
938
+
939
+ class AgentRunContext:
940
+ """Context manager that works in both sync and async contexts."""
941
+
942
+ def __init__(
943
+ self,
944
+ agent_run_id: Optional[str] = None,
945
+ transcript_id: Optional[str] = None,
946
+ metadata: Optional[Dict[str, Any]] = None,
947
+ **attributes: Any,
948
+ ):
949
+ self.agent_run_id = agent_run_id
950
+ self.transcript_id = transcript_id
951
+ self.metadata = metadata
952
+ self.attributes: dict[str, Any] = attributes
953
+ self._sync_context: Optional[Any] = None
954
+ self._async_context: Optional[Any] = None
955
+
956
+ def __enter__(self) -> tuple[str, str]:
957
+ """Sync context manager entry."""
958
+ self._sync_context = get_tracer().agent_run_context(
959
+ self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
960
+ )
961
+ return self._sync_context.__enter__()
962
+
963
+ def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
964
+ """Sync context manager exit."""
965
+ if self._sync_context:
966
+ self._sync_context.__exit__(exc_type, exc_val, exc_tb)
967
+
968
+ async def __aenter__(self) -> tuple[str, str]:
969
+ """Async context manager entry."""
970
+ self._async_context = get_tracer().async_agent_run_context(
971
+ self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
972
+ )
973
+ return await self._async_context.__aenter__()
974
+
975
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
976
+ """Async context manager exit."""
977
+ if self._async_context:
978
+ await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
979
+
980
+
981
+ def agent_run(
982
+ func: Optional[Callable[..., Any]] = None, *, metadata: Optional[Dict[str, Any]] = None
983
+ ):
984
+ """
985
+ Decorator to wrap a function in an agent_run_context (sync or async).
986
+ Injects agent_run_id and transcript_id as function attributes.
987
+ Optionally accepts metadata to attach to the agent run context.
988
+
989
+ Example:
990
+ @agent_run
991
+ def my_func(x, y):
992
+ print(my_func.docent.agent_run_id, my_func.docent.transcript_id)
993
+
994
+ @agent_run(metadata={"user": "John", "model": "gpt-4"})
995
+ def my_func_with_metadata(x, y):
996
+ print(my_func_with_metadata.docent.agent_run_id)
997
+
998
+ @agent_run(metadata={"config": {"model": "gpt-4", "temperature": 0.7}})
999
+ async def my_async_func(z):
1000
+ print(my_async_func.docent.agent_run_id)
1001
+ """
1002
+ import functools
1003
+ import inspect
1004
+
1005
+ def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
1006
+ if inspect.iscoroutinefunction(f):
1007
+
1008
+ @functools.wraps(f)
1009
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
1010
+ async with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
1011
+ # Store docent data as function attributes
1012
+ setattr(
1013
+ async_wrapper,
1014
+ "docent",
1015
+ type(
1016
+ "DocentData",
1017
+ (),
1018
+ {
1019
+ "agent_run_id": agent_run_id,
1020
+ "transcript_id": transcript_id,
1021
+ },
1022
+ )(),
1023
+ )
1024
+ return await f(*args, **kwargs)
1025
+
1026
+ return async_wrapper
1027
+ else:
1028
+
1029
+ @functools.wraps(f)
1030
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
1031
+ with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
1032
+ # Store docent data as function attributes
1033
+ setattr(
1034
+ sync_wrapper,
1035
+ "docent",
1036
+ type(
1037
+ "DocentData",
1038
+ (),
1039
+ {
1040
+ "agent_run_id": agent_run_id,
1041
+ "transcript_id": transcript_id,
1042
+ },
1043
+ )(),
1044
+ )
1045
+ return f(*args, **kwargs)
1046
+
1047
+ return sync_wrapper
1048
+
1049
+ if func is None:
1050
+ return decorator
1051
+ else:
1052
+ return decorator(func)
1053
+
1054
+
1055
+ def agent_run_context(
1056
+ agent_run_id: Optional[str] = None,
1057
+ transcript_id: Optional[str] = None,
1058
+ metadata: Optional[Dict[str, Any]] = None,
1059
+ **attributes: Any,
1060
+ ) -> AgentRunContext:
1061
+ """
1062
+ Create an agent run context for tracing.
1063
+
1064
+ Args:
1065
+ agent_run_id: Optional agent run ID (auto-generated if not provided)
1066
+ transcript_id: Optional transcript ID (auto-generated if not provided)
1067
+ metadata: Optional nested dictionary of metadata to attach as events
1068
+ **attributes: Additional attributes to add to the context
1069
+
1070
+ Returns:
1071
+ A context manager that can be used with both 'with' and 'async with'
1072
+
1073
+ Example:
1074
+ # Sync usage
1075
+ with agent_run_context() as (agent_run_id, transcript_id):
1076
+ pass
1077
+
1078
+ # Async usage
1079
+ async with agent_run_context() as (agent_run_id, transcript_id):
1080
+ pass
1081
+
1082
+ # With metadata
1083
+ with agent_run_context(metadata={"user": "John", "model": "gpt-4"}) as (agent_run_id, transcript_id):
1084
+ pass
1085
+ """
1086
+ return AgentRunContext(agent_run_id, transcript_id, metadata=metadata, **attributes)