docent-python 0.1.22a0__py3-none-any.whl → 0.1.24a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

docent/trace_2.py DELETED
@@ -1,1842 +0,0 @@
1
- import atexit
2
- import contextvars
3
- import itertools
4
- import logging
5
- import os
6
- import sys
7
- import threading
8
- import uuid
9
- from collections import defaultdict
10
- from contextlib import asynccontextmanager, contextmanager
11
- from contextvars import ContextVar, Token
12
- from datetime import datetime, timezone
13
- from enum import Enum
14
- from importlib.metadata import Distribution, distributions
15
- from typing import (
16
- Any,
17
- AsyncIterator,
18
- Callable,
19
- Dict,
20
- Iterator,
21
- List,
22
- Mapping,
23
- Optional,
24
- Sequence,
25
- Set,
26
- Tuple,
27
- Union,
28
- cast,
29
- )
30
-
31
- import requests
32
- from opentelemetry import trace
33
- from opentelemetry.context import Context
34
- from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
35
- from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
36
- from opentelemetry.instrumentation.threading import ThreadingInstrumentor
37
- from opentelemetry.sdk.resources import Resource
38
- from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider, SpanLimits
39
- from opentelemetry.sdk.trace.export import (
40
- BatchSpanProcessor,
41
- ConsoleSpanExporter,
42
- SimpleSpanProcessor,
43
- SpanExporter,
44
- )
45
- from opentelemetry.trace import Span, SpanContext
46
-
47
- logger = logging.getLogger(__name__)
48
-
49
- # Default configuration
50
- DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
51
- DEFAULT_COLLECTION_NAME = "default-collection-name"
52
-
53
-
54
- class Instruments(Enum):
55
- """Enumeration of available instrument types."""
56
-
57
- OPENAI = "openai"
58
- ANTHROPIC = "anthropic"
59
- BEDROCK = "bedrock"
60
- LANGCHAIN = "langchain"
61
-
62
-
63
- class DocentSpanProcessor(SpanProcessor):
64
- """Span processor that enriches spans on_end and forwards to exporters."""
65
-
66
- class _MergedReadableSpan:
67
- def __init__(self, original: ReadableSpan, extra_attributes: Dict[str, Any]):
68
- self._original = original
69
- self._extra_attributes = extra_attributes
70
- self._merged_attributes: Optional[Dict[str, Any]] = None
71
-
72
- def __getattr__(self, item: str) -> Any:
73
- return getattr(self._original, item)
74
-
75
- @property
76
- def attributes(self) -> Mapping[str, Any]:
77
- if self._merged_attributes is None:
78
- original_attrs = cast(Mapping[str, Any], self._original.attributes)
79
- merged = dict(original_attrs.items())
80
- merged.update(self._extra_attributes)
81
- self._merged_attributes = merged
82
- return cast(Mapping[str, Any], self._merged_attributes)
83
-
84
- def __init__(
85
- self,
86
- manager: "DocentTracer",
87
- exporters: Sequence[SpanExporter],
88
- use_simple_processor: bool,
89
- ) -> None:
90
- self._manager = manager
91
- self._delegates: List[SpanProcessor] = [
92
- SimpleSpanProcessor(exporter) if use_simple_processor else BatchSpanProcessor(exporter)
93
- for exporter in exporters
94
- ]
95
- self._span_attributes: Dict[Tuple[int, int], Dict[str, Any]] = {}
96
- self._lock = threading.Lock()
97
-
98
- @staticmethod
99
- def _span_key(span: Union[Span, ReadableSpan]) -> Tuple[int, int]:
100
- context = cast(SpanContext, span.get_span_context())
101
- return (context.trace_id, context.span_id)
102
-
103
- def on_start(
104
- self,
105
- span: Span,
106
- parent_context: Optional[Context] = None,
107
- ) -> None:
108
- if self._manager.is_disabled() or not self._delegates:
109
- return
110
-
111
- attributes = self._manager.build_span_context_attributes()
112
- key = self._span_key(span)
113
- with self._lock:
114
- self._span_attributes[key] = attributes
115
-
116
- def on_end(self, span: ReadableSpan) -> None:
117
- key = self._span_key(span)
118
- with self._lock:
119
- context_attributes = self._span_attributes.pop(key, {})
120
-
121
- if self._manager.is_disabled() or not self._delegates:
122
- return
123
-
124
- wrapped_span = cast(ReadableSpan, self._MergedReadableSpan(span, context_attributes))
125
- span_attrs_mapping = cast(Mapping[str, Any], wrapped_span.attributes)
126
- span_attrs = dict(span_attrs_mapping.items())
127
- logger.debug(
128
- "Exporting span copy: name='%s', collection_id=%s, agent_run_id=%s, transcript_id=%s",
129
- wrapped_span.name,
130
- span_attrs.get("collection_id"),
131
- span_attrs.get("agent_run_id"),
132
- span_attrs.get("transcript_id"),
133
- )
134
-
135
- for delegate in self._delegates:
136
- delegate.on_end(wrapped_span)
137
-
138
- def shutdown(self) -> None:
139
- for delegate in self._delegates:
140
- delegate.shutdown()
141
-
142
- def force_flush(self, timeout_millis: Optional[float] = None) -> bool:
143
- result = True
144
- delegate_timeout = int(timeout_millis) if timeout_millis is not None else 30000
145
- for delegate in self._delegates:
146
- result = delegate.force_flush(delegate_timeout) and result
147
- return result
148
-
149
-
150
- class DocentTracer:
151
- """
152
- Manages Docent tracing setup and provides tracing utilities.
153
- """
154
-
155
- def __init__(
156
- self,
157
- collection_name: str = DEFAULT_COLLECTION_NAME,
158
- collection_id: Optional[str] = None,
159
- agent_run_id: Optional[str] = None,
160
- endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
161
- headers: Optional[Dict[str, str]] = None,
162
- api_key: Optional[str] = None,
163
- enable_console_export: bool = False,
164
- enable_otlp_export: bool = True,
165
- disable_batch: bool = False,
166
- instruments: Optional[Set[Instruments]] = None,
167
- block_instruments: Optional[Set[Instruments]] = None,
168
- ):
169
- self._initialized: bool = False
170
- # Check if tracing is disabled via environment variable
171
- if _is_tracing_disabled():
172
- self._disabled = True
173
- logger.info("Docent tracing disabled via DOCENT_DISABLE_TRACING environment variable")
174
- return
175
-
176
- self.collection_name: str = collection_name
177
- self.collection_id: str = collection_id if collection_id else str(uuid.uuid4())
178
- self.default_agent_run_id: str = agent_run_id if agent_run_id else str(uuid.uuid4())
179
- self.endpoints: List[str]
180
-
181
- # Handle endpoint parameter - convert to list if it's a string
182
- if isinstance(endpoint, str):
183
- self.endpoints = [endpoint]
184
- else:
185
- self.endpoints = endpoint
186
-
187
- # Build headers with authentication if provided
188
- self.headers = headers or {}
189
-
190
- # Handle API key authentication (takes precedence over custom headers)
191
- if api_key:
192
- self.headers["Authorization"] = f"Bearer {api_key}"
193
- logger.info(f"Using API key authentication for {self.collection_name}")
194
- elif self.headers.get("Authorization"):
195
- logger.info(f"Using custom Authorization header for {self.collection_name}")
196
- else:
197
- logger.info(f"No authentication configured for {self.collection_name}")
198
-
199
- self.enable_console_export = enable_console_export
200
- self.enable_otlp_export = enable_otlp_export
201
- self.disable_batch = disable_batch
202
- self.disabled_instruments: Set[Instruments] = {Instruments.LANGCHAIN}
203
- self.instruments = instruments or (set(Instruments) - self.disabled_instruments)
204
- self.block_instruments = block_instruments or set()
205
-
206
- # Use separate tracer provider to avoid interfering with existing OTEL setup
207
- self._tracer_provider: Optional[TracerProvider] = None
208
- self._root_context: Optional[Context] = Context()
209
- self._tracer: Optional[trace.Tracer] = None
210
- self._cleanup_registered: bool = False
211
- self._disabled: bool = False
212
- self._span_processor: Optional[SpanProcessor] = None
213
-
214
- # Base HTTP endpoint for direct API calls (scores, metadata, trace-done)
215
- if len(self.endpoints) > 0:
216
- self._api_endpoint_base: Optional[str] = self.endpoints[0]
217
-
218
- # Context variables for agent_run_id and transcript_id
219
- self._collection_id_var: ContextVar[str] = contextvars.ContextVar("docent_collection_id")
220
- self._agent_run_id_var: ContextVar[str] = contextvars.ContextVar("docent_agent_run_id")
221
- self._transcript_id_var: ContextVar[str] = contextvars.ContextVar("docent_transcript_id")
222
- self._transcript_group_id_var: ContextVar[str] = contextvars.ContextVar(
223
- "docent_transcript_group_id"
224
- )
225
- self._attributes_var: ContextVar[dict[str, Any]] = contextvars.ContextVar(
226
- "docent_attributes"
227
- )
228
- # Store atomic span order counters per transcript_id to persist across context switches
229
- self._transcript_counters: defaultdict[str, itertools.count[int]] = defaultdict(
230
- lambda: itertools.count(0)
231
- )
232
- self._transcript_counter_lock = threading.Lock()
233
- self._flush_lock = threading.Lock()
234
-
235
- def get_current_agent_run_id(self) -> Optional[str]:
236
- """
237
- Get the current agent run ID from context.
238
-
239
- Retrieves the agent run ID that was set in the current execution context.
240
- If no agent run context is active, returns the default agent run ID.
241
-
242
- Returns:
243
- The current agent run ID if available, or the default agent run ID
244
- if no context is active.
245
- """
246
- try:
247
- return self._agent_run_id_var.get()
248
- except LookupError:
249
- return self.default_agent_run_id
250
-
251
- def _register_cleanup(self):
252
- """Register cleanup handlers."""
253
- if self._cleanup_registered:
254
- return
255
-
256
- # Register atexit handler
257
- atexit.register(self.cleanup)
258
-
259
- self._cleanup_registered = True
260
-
261
- def _next_span_order(self, transcript_id: str) -> int:
262
- """
263
- Get the next span order for a given transcript_id.
264
- Thread-safe and guaranteed to be unique and monotonic.
265
- """
266
- with self._transcript_counter_lock:
267
- return next(self._transcript_counters[transcript_id])
268
-
269
- def build_span_context_attributes(self) -> Dict[str, Any]:
270
- """Collect context-driven attributes to attach to exported spans."""
271
-
272
- attributes: Dict[str, Any] = {"collection_id": self.collection_id}
273
-
274
- try:
275
- agent_run_id = self._agent_run_id_var.get()
276
- if agent_run_id:
277
- attributes["agent_run_id"] = agent_run_id
278
- else:
279
- attributes["agent_run_id_default"] = True
280
- attributes["agent_run_id"] = self.default_agent_run_id
281
- except LookupError:
282
- attributes["agent_run_id_default"] = True
283
- attributes["agent_run_id"] = self.default_agent_run_id
284
-
285
- try:
286
- transcript_group_id = self._transcript_group_id_var.get()
287
- if transcript_group_id:
288
- attributes["transcript_group_id"] = transcript_group_id
289
- except LookupError:
290
- pass
291
-
292
- transcript_id: Optional[str] = None
293
- try:
294
- transcript_id = self._transcript_id_var.get()
295
- except LookupError:
296
- transcript_id = None
297
-
298
- if transcript_id:
299
- attributes["transcript_id"] = transcript_id
300
- attributes["span_order"] = self._next_span_order(transcript_id)
301
-
302
- try:
303
- additional_attributes = self._attributes_var.get()
304
- for key, value in additional_attributes.items():
305
- attributes[key] = value
306
- except LookupError:
307
- pass
308
-
309
- return attributes
310
-
311
- def _init_spans_exporter(self, endpoint: str) -> Optional[Union[HTTPExporter, GRPCExporter]]:
312
- """Initialize the appropriate span exporter based on endpoint."""
313
- if not self.enable_otlp_export:
314
- return None
315
-
316
- try:
317
- if "http" in endpoint.lower() or "https" in endpoint.lower():
318
- http_exporter: HTTPExporter = HTTPExporter(
319
- endpoint=f"{endpoint}/v1/traces", headers=self.headers, timeout=30
320
- )
321
- logger.debug(f"Initialized HTTP exporter for endpoint: {endpoint}/v1/traces")
322
- return http_exporter
323
- else:
324
- grpc_exporter: GRPCExporter = GRPCExporter(
325
- endpoint=endpoint, headers=self.headers, timeout=30
326
- )
327
- logger.debug(f"Initialized gRPC exporter for endpoint: {endpoint}")
328
- return grpc_exporter
329
- except Exception as e:
330
- logger.error(f"Failed to initialize span exporter for {endpoint}: {e}")
331
- return None
332
-
333
- def _init_spans_exporters(self) -> List[Union[HTTPExporter, GRPCExporter]]:
334
- """Initialize span exporters for all endpoints."""
335
- exporters: List[Union[HTTPExporter, GRPCExporter]] = []
336
-
337
- for endpoint in self.endpoints:
338
- exporter = self._init_spans_exporter(endpoint)
339
- if exporter:
340
- exporters.append(exporter)
341
- logger.info(f"Initialized exporter for endpoint: {endpoint}")
342
- else:
343
- logger.critical(f"Failed to initialize exporter for endpoint: {endpoint}")
344
-
345
- return exporters
346
-
347
- def initialize(self):
348
- """Initialize Docent tracing setup."""
349
- if self._initialized:
350
- return
351
-
352
- # If tracing is disabled, mark as initialized but don't set up anything
353
- if self._disabled:
354
- self._initialized = True
355
- return
356
-
357
- try:
358
-
359
- # Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
360
- default_attribute_limit = 1024
361
- env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
362
- env_limit = int(env_value) if env_value.isdigit() else 0
363
- attribute_limit = max(env_limit, default_attribute_limit)
364
-
365
- span_limits = SpanLimits(
366
- max_attributes=attribute_limit,
367
- )
368
-
369
- # Create our own isolated tracer provider
370
- self._tracer_provider = TracerProvider(
371
- resource=Resource.create({"service.name": self.collection_name}),
372
- span_limits=span_limits,
373
- )
374
-
375
- exporters: List[SpanExporter] = []
376
-
377
- if self.enable_otlp_export:
378
- otlp_exporters: List[Union[HTTPExporter, GRPCExporter]] = (
379
- self._init_spans_exporters()
380
- )
381
- if otlp_exporters:
382
- exporters.extend(otlp_exporters)
383
- logger.info(
384
- "Registered %s OTLP exporter(s) for %s endpoint(s)",
385
- len(otlp_exporters),
386
- len(self.endpoints),
387
- )
388
-
389
- if self.enable_console_export:
390
- exporters.append(ConsoleSpanExporter())
391
- logger.info("Registered console span exporter")
392
-
393
- if exporters:
394
- use_simple_processor = self.disable_batch or _is_notebook()
395
- if use_simple_processor:
396
- logger.debug("Using SimpleSpanProcessor delegation for Docent spans")
397
- else:
398
- logger.debug("Using BatchSpanProcessor delegation for Docent spans")
399
-
400
- self._span_processor = DocentSpanProcessor(
401
- manager=self,
402
- exporters=exporters,
403
- use_simple_processor=use_simple_processor,
404
- )
405
- self._tracer_provider.add_span_processor(self._span_processor)
406
- else:
407
- logger.warning("No span exporters configured; spans will not be exported")
408
-
409
- # Get tracer from our isolated provider (don't set global provider)
410
- self._tracer = self._tracer_provider.get_tracer(__name__)
411
-
412
- # Instrument threading for better context propagation
413
- try:
414
- ThreadingInstrumentor().instrument()
415
- except Exception as e:
416
- logger.warning(f"Failed to instrument threading: {e}")
417
-
418
- enabled_instruments = self.instruments - self.block_instruments
419
-
420
- # Instrument OpenAI with our isolated tracer provider
421
- if Instruments.OPENAI in enabled_instruments:
422
- try:
423
- if is_package_installed("openai"):
424
- from opentelemetry.instrumentation.openai import OpenAIInstrumentor
425
-
426
- OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
427
- logger.info("Instrumented OpenAI")
428
- except Exception as e:
429
- logger.warning(f"Failed to instrument OpenAI: {e}")
430
-
431
- # Instrument Anthropic with our isolated tracer provider
432
- if Instruments.ANTHROPIC in enabled_instruments:
433
- try:
434
- if is_package_installed("anthropic"):
435
- from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
436
-
437
- AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
438
- logger.info("Instrumented Anthropic")
439
- except Exception as e:
440
- logger.warning(f"Failed to instrument Anthropic: {e}")
441
-
442
- # Instrument Bedrock with our isolated tracer provider
443
- if Instruments.BEDROCK in enabled_instruments:
444
- try:
445
- if is_package_installed("boto3"):
446
- from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
447
-
448
- BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
449
- logger.info("Instrumented Bedrock")
450
- except Exception as e:
451
- logger.warning(f"Failed to instrument Bedrock: {e}")
452
-
453
- # Instrument LangChain with our isolated tracer provider
454
- if Instruments.LANGCHAIN in enabled_instruments:
455
- try:
456
- if is_package_installed("langchain") or is_package_installed("langgraph"):
457
- from opentelemetry.instrumentation.langchain import LangchainInstrumentor
458
-
459
- LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
460
- logger.info("Instrumented LangChain")
461
- except Exception as e:
462
- logger.warning(f"Failed to instrument LangChain: {e}")
463
-
464
- # Register cleanup handlers
465
- self._register_cleanup()
466
-
467
- self._initialized = True
468
- logger.info(f"Docent tracing initialized for {self.collection_name}")
469
-
470
- except Exception as e:
471
- logger.error(f"Failed to initialize Docent tracing: {e}")
472
- self._disabled = True
473
- raise
474
-
475
- def cleanup(self):
476
- """
477
- Clean up Docent tracing resources.
478
-
479
- Flushes all pending spans to exporters and shuts down the tracer provider.
480
- This method is automatically called during application shutdown via atexit
481
- handlers, but can also be called manually for explicit cleanup.
482
-
483
- The cleanup process:
484
- 1. Flushes all span processors to ensure data is exported
485
- 2. Shuts down the tracer provider and releases resources
486
- """
487
- if self._disabled:
488
- return
489
-
490
- try:
491
- self.flush()
492
-
493
- if self._tracer_provider:
494
- self._tracer_provider.shutdown()
495
- self._tracer_provider = None
496
- self._span_processor = None
497
- except Exception as e:
498
- logger.error(f"Error during cleanup: {e}")
499
-
500
- def close(self):
501
- """Explicitly close the Docent tracing manager."""
502
- if self._disabled:
503
- return
504
-
505
- try:
506
- self.cleanup()
507
- if self._cleanup_registered:
508
- atexit.unregister(self.cleanup)
509
- self._cleanup_registered = False
510
- except Exception as e:
511
- logger.error(f"Error during close: {e}")
512
-
513
- def flush(self) -> None:
514
- """Force flush all spans to exporters."""
515
- if self._disabled:
516
- return
517
-
518
- try:
519
- if not self._span_processor:
520
- logger.debug("No Docent span processor registered to flush")
521
- return
522
-
523
- logger.debug("Flushing Docent span processor")
524
- self._span_processor.force_flush(timeout_millis=50)
525
- logger.debug("Span flush completed")
526
- except Exception as e:
527
- logger.error(f"Error during flush: {e}")
528
-
529
- def is_disabled(self) -> bool:
530
- """Check if tracing is disabled."""
531
- return self._disabled
532
-
533
- def set_disabled(self, disabled: bool) -> None:
534
- """Enable or disable tracing."""
535
- self._disabled = disabled
536
- if disabled and self._initialized:
537
- self.cleanup()
538
-
539
- def is_initialized(self) -> bool:
540
- """Verify if the manager is properly initialized."""
541
- return self._initialized
542
-
543
- @contextmanager
544
- def agent_run_context(
545
- self,
546
- agent_run_id: Optional[str] = None,
547
- transcript_id: Optional[str] = None,
548
- metadata: Optional[Dict[str, Any]] = None,
549
- **attributes: Any,
550
- ) -> Iterator[tuple[str, str]]:
551
- """
552
- Context manager for setting up an agent run context.
553
-
554
- Args:
555
- agent_run_id: Optional agent run ID (auto-generated if not provided)
556
- transcript_id: Optional transcript ID (auto-generated if not provided)
557
- metadata: Optional nested dictionary of metadata to send to backend
558
- **attributes: Additional attributes to add to the context
559
-
560
- Yields:
561
- Tuple of (agent_run_id, transcript_id)
562
- """
563
- if self._disabled:
564
- # Return dummy IDs when tracing is disabled
565
- if agent_run_id is None:
566
- agent_run_id = str(uuid.uuid4())
567
- if transcript_id is None:
568
- transcript_id = str(uuid.uuid4())
569
- yield agent_run_id, transcript_id
570
- return
571
-
572
- if not self._initialized:
573
- self.initialize()
574
-
575
- if agent_run_id is None:
576
- agent_run_id = str(uuid.uuid4())
577
- if transcript_id is None:
578
- transcript_id = str(uuid.uuid4())
579
-
580
- # Set context variables for this execution context
581
- agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
582
- transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
583
- attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
584
-
585
- try:
586
- # Send metadata directly to backend if provided
587
- if metadata:
588
- try:
589
- self.send_agent_run_metadata(agent_run_id, metadata)
590
- except Exception as e:
591
- logger.warning(f"Failed sending agent run metadata: {e}")
592
-
593
- yield agent_run_id, transcript_id
594
- finally:
595
- self._agent_run_id_var.reset(agent_run_id_token)
596
- self._transcript_id_var.reset(transcript_id_token)
597
- self._attributes_var.reset(attributes_token)
598
-
599
- @asynccontextmanager
600
- async def async_agent_run_context(
601
- self,
602
- agent_run_id: Optional[str] = None,
603
- transcript_id: Optional[str] = None,
604
- metadata: Optional[Dict[str, Any]] = None,
605
- **attributes: Any,
606
- ) -> AsyncIterator[tuple[str, str]]:
607
- """
608
- Async context manager for setting up an agent run context.
609
- Modifies the OpenTelemetry context so all spans inherit agent_run_id and transcript_id.
610
-
611
- Args:
612
- agent_run_id: Optional agent run ID (auto-generated if not provided)
613
- transcript_id: Optional transcript ID (auto-generated if not provided)
614
- metadata: Optional nested dictionary of metadata to send to backend
615
- **attributes: Additional attributes to add to the context
616
-
617
- Yields:
618
- Tuple of (agent_run_id, transcript_id)
619
- """
620
- if self._disabled:
621
- # Return dummy IDs when tracing is disabled
622
- if agent_run_id is None:
623
- agent_run_id = str(uuid.uuid4())
624
- if transcript_id is None:
625
- transcript_id = str(uuid.uuid4())
626
- yield agent_run_id, transcript_id
627
- return
628
-
629
- if not self._initialized:
630
- self.initialize()
631
-
632
- if agent_run_id is None:
633
- agent_run_id = str(uuid.uuid4())
634
- if transcript_id is None:
635
- transcript_id = str(uuid.uuid4())
636
-
637
- # Set context variables for this execution context
638
- agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
639
- transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
640
- attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
641
-
642
- try:
643
- # Send metadata directly to backend if provided
644
- if metadata:
645
- try:
646
- self.send_agent_run_metadata(agent_run_id, metadata)
647
- except Exception as e:
648
- logger.warning(f"Failed sending agent run metadata: {e}")
649
-
650
- yield agent_run_id, transcript_id
651
- finally:
652
- self._agent_run_id_var.reset(agent_run_id_token)
653
- self._transcript_id_var.reset(transcript_id_token)
654
- self._attributes_var.reset(attributes_token)
655
-
656
- def _api_headers(self) -> Dict[str, str]:
657
- """
658
- Get the API headers for HTTP requests.
659
-
660
- Returns:
661
- Dictionary of headers including Authorization if set
662
- """
663
- headers = {"Content-Type": "application/json"}
664
-
665
- authorization = self.headers.get("Authorization")
666
- if authorization:
667
- headers["Authorization"] = authorization
668
-
669
- return headers
670
-
671
- def _post_json(self, path: str, data: Dict[str, Any]) -> None:
672
- if not self._api_endpoint_base:
673
- raise RuntimeError("API endpoint base is not configured")
674
- url = f"{self._api_endpoint_base}{path}"
675
- try:
676
- resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
677
- resp.raise_for_status()
678
- except requests.exceptions.RequestException as e:
679
- logger.error(f"Failed POST {url}: {e}")
680
-
681
- def send_agent_run_score(
682
- self,
683
- agent_run_id: str,
684
- name: str,
685
- score: float,
686
- attributes: Optional[Dict[str, Any]] = None,
687
- ) -> None:
688
- """
689
- Send a score to the backend for a specific agent run.
690
-
691
- Args:
692
- agent_run_id: The agent run ID
693
- name: Name of the score metric
694
- score: Numeric score value
695
- attributes: Optional additional attributes
696
- """
697
- if self._disabled:
698
- return
699
-
700
- collection_id = self.collection_id
701
- payload: Dict[str, Any] = {
702
- "collection_id": collection_id,
703
- "agent_run_id": agent_run_id,
704
- "score_name": name,
705
- "score_value": score,
706
- "timestamp": datetime.now(timezone.utc).isoformat(),
707
- }
708
- if attributes:
709
- payload.update(attributes)
710
- self._post_json("/v1/scores", payload)
711
-
712
- def send_agent_run_metadata(self, agent_run_id: str, metadata: Dict[str, Any]) -> None:
713
- if self._disabled:
714
- return
715
-
716
- collection_id = self.collection_id
717
- payload: Dict[str, Any] = {
718
- "collection_id": collection_id,
719
- "agent_run_id": agent_run_id,
720
- "metadata": metadata,
721
- "timestamp": datetime.now(timezone.utc).isoformat(),
722
- }
723
- self._post_json("/v1/agent-run-metadata", payload)
724
-
725
- def send_transcript_metadata(
726
- self,
727
- transcript_id: str,
728
- name: Optional[str] = None,
729
- description: Optional[str] = None,
730
- transcript_group_id: Optional[str] = None,
731
- metadata: Optional[Dict[str, Any]] = None,
732
- ) -> None:
733
- """
734
- Send transcript data to the backend.
735
-
736
- Args:
737
- transcript_id: The transcript ID
738
- name: Optional transcript name
739
- description: Optional transcript description
740
- transcript_group_id: Optional transcript group ID
741
- metadata: Optional metadata to send
742
- """
743
- if self._disabled:
744
- return
745
-
746
- collection_id = self.collection_id
747
- payload: Dict[str, Any] = {
748
- "collection_id": collection_id,
749
- "transcript_id": transcript_id,
750
- "timestamp": datetime.now(timezone.utc).isoformat(),
751
- }
752
-
753
- # Only add fields that are provided
754
- if name is not None:
755
- payload["name"] = name
756
- if description is not None:
757
- payload["description"] = description
758
- if transcript_group_id is not None:
759
- payload["transcript_group_id"] = transcript_group_id
760
- if metadata is not None:
761
- payload["metadata"] = metadata
762
-
763
- self._post_json("/v1/transcript-metadata", payload)
764
-
765
- def get_current_transcript_id(self) -> Optional[str]:
766
- """
767
- Get the current transcript ID from context.
768
-
769
- Returns:
770
- The current transcript ID if available, None otherwise
771
- """
772
- try:
773
- return self._transcript_id_var.get()
774
- except LookupError:
775
- return None
776
-
777
- def get_current_transcript_group_id(self) -> Optional[str]:
778
- """
779
- Get the current transcript group ID from context.
780
-
781
- Returns:
782
- The current transcript group ID if available, None otherwise
783
- """
784
- try:
785
- return self._transcript_group_id_var.get()
786
- except LookupError:
787
- return None
788
-
789
- @contextmanager
790
- def transcript_context(
791
- self,
792
- name: Optional[str] = None,
793
- transcript_id: Optional[str] = None,
794
- description: Optional[str] = None,
795
- metadata: Optional[Dict[str, Any]] = None,
796
- transcript_group_id: Optional[str] = None,
797
- ) -> Iterator[str]:
798
- """
799
- Context manager for setting up a transcript context.
800
-
801
- Args:
802
- name: Optional transcript name
803
- transcript_id: Optional transcript ID (auto-generated if not provided)
804
- description: Optional transcript description
805
- metadata: Optional metadata to send to backend
806
- transcript_group_id: Optional transcript group ID
807
-
808
- Yields:
809
- The transcript ID
810
- """
811
- if self._disabled:
812
- # Return dummy ID when tracing is disabled
813
- if transcript_id is None:
814
- transcript_id = str(uuid.uuid4())
815
- yield transcript_id
816
- return
817
-
818
- if not self._initialized:
819
- raise RuntimeError(
820
- "Tracer is not initialized. Call initialize_tracing() before using transcript context."
821
- )
822
-
823
- if transcript_id is None:
824
- transcript_id = str(uuid.uuid4())
825
-
826
- # Determine transcript group ID before setting new context
827
- if transcript_group_id is None:
828
- try:
829
- transcript_group_id = self._transcript_group_id_var.get()
830
- except LookupError:
831
- # No current transcript group context, this transcript has no group
832
- transcript_group_id = None
833
-
834
- # Set context variable for this execution context
835
- transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
836
-
837
- try:
838
- # Send transcript data and metadata to backend
839
- try:
840
- self.send_transcript_metadata(
841
- transcript_id, name, description, transcript_group_id, metadata
842
- )
843
- except Exception as e:
844
- logger.warning(f"Failed sending transcript data: {e}")
845
-
846
- yield transcript_id
847
- finally:
848
- # Reset context variable to previous state
849
- self._transcript_id_var.reset(transcript_id_token)
850
-
851
- @asynccontextmanager
852
- async def async_transcript_context(
853
- self,
854
- name: Optional[str] = None,
855
- transcript_id: Optional[str] = None,
856
- description: Optional[str] = None,
857
- metadata: Optional[Dict[str, Any]] = None,
858
- transcript_group_id: Optional[str] = None,
859
- ) -> AsyncIterator[str]:
860
- """
861
- Async context manager for setting up a transcript context.
862
-
863
- Args:
864
- name: Optional transcript name
865
- transcript_id: Optional transcript ID (auto-generated if not provided)
866
- description: Optional transcript description
867
- metadata: Optional metadata to send to backend
868
- transcript_group_id: Optional transcript group ID
869
-
870
- Yields:
871
- The transcript ID
872
- """
873
- if self._disabled:
874
- # Return dummy ID when tracing is disabled
875
- if transcript_id is None:
876
- transcript_id = str(uuid.uuid4())
877
- yield transcript_id
878
- return
879
-
880
- if not self._initialized:
881
- raise RuntimeError(
882
- "Tracer is not initialized. Call initialize_tracing() before using transcript context."
883
- )
884
-
885
- if transcript_id is None:
886
- transcript_id = str(uuid.uuid4())
887
-
888
- # Determine transcript group ID before setting new context
889
- if transcript_group_id is None:
890
- try:
891
- transcript_group_id = self._transcript_group_id_var.get()
892
- except LookupError:
893
- # No current transcript group context, this transcript has no group
894
- transcript_group_id = None
895
-
896
- # Set context variable for this execution context
897
- transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
898
-
899
- try:
900
- # Send transcript data and metadata to backend
901
- try:
902
- self.send_transcript_metadata(
903
- transcript_id, name, description, transcript_group_id, metadata
904
- )
905
- except Exception as e:
906
- logger.warning(f"Failed sending transcript data: {e}")
907
-
908
- yield transcript_id
909
- finally:
910
- # Reset context variable to previous state
911
- self._transcript_id_var.reset(transcript_id_token)
912
-
913
- def send_transcript_group_metadata(
914
- self,
915
- transcript_group_id: str,
916
- name: Optional[str] = None,
917
- description: Optional[str] = None,
918
- parent_transcript_group_id: Optional[str] = None,
919
- metadata: Optional[Dict[str, Any]] = None,
920
- ) -> None:
921
- """
922
- Send transcript group data to the backend.
923
-
924
- Args:
925
- transcript_group_id: The transcript group ID
926
- name: Optional transcript group name
927
- description: Optional transcript group description
928
- parent_transcript_group_id: Optional parent transcript group ID
929
- metadata: Optional metadata to send
930
- """
931
- if self._disabled:
932
- return
933
-
934
- collection_id = self.collection_id
935
-
936
- # Get agent_run_id from current context
937
- agent_run_id = self.get_current_agent_run_id()
938
- if not agent_run_id:
939
- logger.error(
940
- f"Cannot send transcript group metadata for {transcript_group_id} - no agent_run_id in context"
941
- )
942
- return
943
-
944
- payload: Dict[str, Any] = {
945
- "collection_id": collection_id,
946
- "transcript_group_id": transcript_group_id,
947
- "agent_run_id": agent_run_id,
948
- "timestamp": datetime.now(timezone.utc).isoformat(),
949
- }
950
-
951
- if name is not None:
952
- payload["name"] = name
953
- if description is not None:
954
- payload["description"] = description
955
- if parent_transcript_group_id is not None:
956
- payload["parent_transcript_group_id"] = parent_transcript_group_id
957
- if metadata is not None:
958
- payload["metadata"] = metadata
959
-
960
- self._post_json("/v1/transcript-group-metadata", payload)
961
-
962
- @contextmanager
963
- def transcript_group_context(
964
- self,
965
- name: Optional[str] = None,
966
- transcript_group_id: Optional[str] = None,
967
- description: Optional[str] = None,
968
- metadata: Optional[Dict[str, Any]] = None,
969
- parent_transcript_group_id: Optional[str] = None,
970
- ) -> Iterator[str]:
971
- """
972
- Context manager for setting up a transcript group context.
973
-
974
- Args:
975
- name: Optional transcript group name
976
- transcript_group_id: Optional transcript group ID (auto-generated if not provided)
977
- description: Optional transcript group description
978
- metadata: Optional metadata to send to backend
979
- parent_transcript_group_id: Optional parent transcript group ID
980
-
981
- Yields:
982
- The transcript group ID
983
- """
984
- if self._disabled:
985
- # Return dummy ID when tracing is disabled
986
- if transcript_group_id is None:
987
- transcript_group_id = str(uuid.uuid4())
988
- yield transcript_group_id
989
- return
990
-
991
- if not self._initialized:
992
- raise RuntimeError(
993
- "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
994
- )
995
-
996
- if transcript_group_id is None:
997
- transcript_group_id = str(uuid.uuid4())
998
-
999
- # Determine parent transcript group ID before setting new context
1000
- if parent_transcript_group_id is None:
1001
- try:
1002
- parent_transcript_group_id = self._transcript_group_id_var.get()
1003
- except LookupError:
1004
- # No current transcript group context, this becomes a root group
1005
- parent_transcript_group_id = None
1006
-
1007
- # Set context variable for this execution context
1008
- transcript_group_id_token: Token[str] = self._transcript_group_id_var.set(
1009
- transcript_group_id
1010
- )
1011
-
1012
- try:
1013
- # Send transcript group data and metadata to backend
1014
- try:
1015
- self.send_transcript_group_metadata(
1016
- transcript_group_id, name, description, parent_transcript_group_id, metadata
1017
- )
1018
- except Exception as e:
1019
- logger.warning(f"Failed sending transcript group data: {e}")
1020
-
1021
- yield transcript_group_id
1022
- finally:
1023
- # Reset context variable to previous state
1024
- self._transcript_group_id_var.reset(transcript_group_id_token)
1025
-
1026
- @asynccontextmanager
1027
- async def async_transcript_group_context(
1028
- self,
1029
- name: Optional[str] = None,
1030
- transcript_group_id: Optional[str] = None,
1031
- description: Optional[str] = None,
1032
- metadata: Optional[Dict[str, Any]] = None,
1033
- parent_transcript_group_id: Optional[str] = None,
1034
- ) -> AsyncIterator[str]:
1035
- """
1036
- Async context manager for setting up a transcript group context.
1037
-
1038
- Args:
1039
- name: Optional transcript group name
1040
- transcript_group_id: Optional transcript group ID (auto-generated if not provided)
1041
- description: Optional transcript group description
1042
- metadata: Optional metadata to send to backend
1043
- parent_transcript_group_id: Optional parent transcript group ID
1044
-
1045
- Yields:
1046
- The transcript group ID
1047
- """
1048
- if self._disabled:
1049
- # Return dummy ID when tracing is disabled
1050
- if transcript_group_id is None:
1051
- transcript_group_id = str(uuid.uuid4())
1052
- yield transcript_group_id
1053
- return
1054
-
1055
- if not self._initialized:
1056
- raise RuntimeError(
1057
- "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
1058
- )
1059
-
1060
- if transcript_group_id is None:
1061
- transcript_group_id = str(uuid.uuid4())
1062
-
1063
- # Determine parent transcript group ID before setting new context
1064
- if parent_transcript_group_id is None:
1065
- try:
1066
- parent_transcript_group_id = self._transcript_group_id_var.get()
1067
- except LookupError:
1068
- # No current transcript group context, this becomes a root group
1069
- parent_transcript_group_id = None
1070
-
1071
- # Set context variable for this execution context
1072
- transcript_group_id_token: Token[str] = self._transcript_group_id_var.set(
1073
- transcript_group_id
1074
- )
1075
-
1076
- try:
1077
- # Send transcript group data and metadata to backend
1078
- try:
1079
- self.send_transcript_group_metadata(
1080
- transcript_group_id, name, description, parent_transcript_group_id, metadata
1081
- )
1082
- except Exception as e:
1083
- logger.warning(f"Failed sending transcript group data: {e}")
1084
-
1085
- yield transcript_group_id
1086
- finally:
1087
- # Reset context variable to previous state
1088
- self._transcript_group_id_var.reset(transcript_group_id_token)
1089
-
1090
- def _send_trace_done(self) -> None:
1091
- if self._disabled:
1092
- return
1093
-
1094
- collection_id = self.collection_id
1095
- payload: Dict[str, Any] = {
1096
- "collection_id": collection_id,
1097
- "status": "completed",
1098
- "timestamp": datetime.now(timezone.utc).isoformat(),
1099
- }
1100
- self._post_json("/v1/trace-done", payload)
1101
-
1102
-
1103
- _global_tracer: Optional[DocentTracer] = None
1104
-
1105
-
1106
- def initialize_tracing(
1107
- collection_name: str = DEFAULT_COLLECTION_NAME,
1108
- collection_id: Optional[str] = None,
1109
- endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
1110
- headers: Optional[Dict[str, str]] = None,
1111
- api_key: Optional[str] = None,
1112
- enable_console_export: bool = False,
1113
- enable_otlp_export: bool = True,
1114
- disable_batch: bool = False,
1115
- instruments: Optional[Set[Instruments]] = None,
1116
- block_instruments: Optional[Set[Instruments]] = None,
1117
- ) -> DocentTracer:
1118
- """
1119
- Initialize the global Docent tracer.
1120
-
1121
- This is the primary entry point for setting up Docent tracing.
1122
- It creates a global singleton instance that can be accessed via get_tracer().
1123
-
1124
- Args:
1125
- collection_name: Name of the collection
1126
- collection_id: Optional collection ID (auto-generated if not provided)
1127
- endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
1128
- headers: Optional headers for authentication
1129
- api_key: Optional API key for bearer token authentication (takes precedence
1130
- over DOCENT_API_KEY environment variable)
1131
- enable_console_export: Whether to export spans to console for debugging
1132
- enable_otlp_export: Whether to export spans to OTLP endpoint
1133
- disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
1134
- instruments: Set of instruments to enable (None = all instruments).
1135
- block_instruments: Set of instruments to explicitly disable.
1136
-
1137
- Returns:
1138
- The initialized Docent tracer
1139
-
1140
- Example:
1141
- initialize_tracing("my-collection")
1142
- """
1143
-
1144
- global _global_tracer
1145
-
1146
- # Check for API key in environment variable if not provided as parameter
1147
- if api_key is None:
1148
- env_api_key: Optional[str] = os.environ.get("DOCENT_API_KEY")
1149
- api_key = env_api_key
1150
-
1151
- if _global_tracer is None:
1152
- _global_tracer = DocentTracer(
1153
- collection_name=collection_name,
1154
- collection_id=collection_id,
1155
- endpoint=endpoint,
1156
- headers=headers,
1157
- api_key=api_key,
1158
- enable_console_export=enable_console_export,
1159
- enable_otlp_export=enable_otlp_export,
1160
- disable_batch=disable_batch,
1161
- instruments=instruments,
1162
- block_instruments=block_instruments,
1163
- )
1164
- _global_tracer.initialize()
1165
-
1166
- return _global_tracer
1167
-
1168
-
1169
- def _get_package_name(dist: Distribution) -> str | None:
1170
- try:
1171
- return dist.name.lower()
1172
- except (KeyError, AttributeError):
1173
- return None
1174
-
1175
-
1176
- installed_packages = {
1177
- name for dist in distributions() if (name := _get_package_name(dist)) is not None
1178
- }
1179
-
1180
-
1181
- def is_package_installed(package_name: str) -> bool:
1182
- return package_name.lower() in installed_packages
1183
-
1184
-
1185
- def get_tracer() -> DocentTracer:
1186
- """Get the global Docent tracer."""
1187
- if _global_tracer is None:
1188
- raise RuntimeError("Docent tracer not initialized")
1189
- return _global_tracer
1190
-
1191
-
1192
- def close_tracing() -> None:
1193
- """Close the global Docent tracer."""
1194
- global _global_tracer
1195
- if _global_tracer:
1196
- _global_tracer.close()
1197
- _global_tracer = None
1198
-
1199
-
1200
- def flush_tracing() -> None:
1201
- """Force flush all spans to exporters."""
1202
- if _global_tracer:
1203
- logger.debug("Flushing Docent tracer")
1204
- _global_tracer.flush()
1205
- else:
1206
- logger.debug("No global tracer available to flush")
1207
-
1208
-
1209
- def is_initialized() -> bool:
1210
- """Verify if the global Docent tracer is properly initialized."""
1211
- if _global_tracer is None:
1212
- return False
1213
- return _global_tracer.is_initialized()
1214
-
1215
-
1216
- def is_disabled() -> bool:
1217
- """Check if global tracing is disabled."""
1218
- if _global_tracer:
1219
- return _global_tracer.is_disabled()
1220
- return True
1221
-
1222
-
1223
- def set_disabled(disabled: bool) -> None:
1224
- """Enable or disable global tracing."""
1225
- if _global_tracer:
1226
- _global_tracer.set_disabled(disabled)
1227
-
1228
-
1229
- def agent_run_score(name: str, score: float, attributes: Optional[Dict[str, Any]] = None) -> None:
1230
- """
1231
- Send a score to the backend for the current agent run.
1232
-
1233
- Args:
1234
- name: Name of the score metric
1235
- score: Numeric score value
1236
- attributes: Optional additional attributes for the score event
1237
- """
1238
- try:
1239
- tracer: DocentTracer = get_tracer()
1240
- if tracer.is_disabled():
1241
- return
1242
- agent_run_id = tracer.get_current_agent_run_id()
1243
-
1244
- if not agent_run_id:
1245
- logger.warning("No active agent run context. Score will not be sent.")
1246
- return
1247
-
1248
- tracer.send_agent_run_score(agent_run_id, name, score, attributes)
1249
- except Exception as e:
1250
- logger.error(f"Failed to send score: {e}")
1251
-
1252
-
1253
- def _flatten_dict(d: Dict[str, Any], prefix: str = "") -> Dict[str, Any]:
1254
- """Flatten nested dictionary with dot notation."""
1255
- flattened: Dict[str, Any] = {}
1256
- for key, value in d.items():
1257
- new_key = f"{prefix}.{key}" if prefix else key
1258
- if isinstance(value, dict):
1259
- flattened.update(_flatten_dict(dict(value), new_key)) # type: ignore
1260
- else:
1261
- flattened[new_key] = value
1262
- return flattened
1263
-
1264
-
1265
- def agent_run_metadata(metadata: Dict[str, Any]) -> None:
1266
- """
1267
- Send metadata directly to the backend for the current agent run.
1268
-
1269
- Args:
1270
- metadata: Dictionary of metadata to attach to the current span (can be nested)
1271
-
1272
- Example:
1273
- agent_run_metadata({"user": "John", "id": 123, "flagged": True})
1274
- agent_run_metadata({"user": {"id": "123", "name": "John"}, "config": {"model": "gpt-4"}})
1275
- """
1276
- try:
1277
- tracer = get_tracer()
1278
- if tracer.is_disabled():
1279
- return
1280
- agent_run_id = tracer.get_current_agent_run_id()
1281
- if not agent_run_id:
1282
- logger.warning("No active agent run context. Metadata will not be sent.")
1283
- return
1284
-
1285
- tracer.send_agent_run_metadata(agent_run_id, metadata)
1286
- except Exception as e:
1287
- logger.error(f"Failed to send metadata: {e}")
1288
-
1289
-
1290
- def transcript_metadata(
1291
- name: Optional[str] = None,
1292
- description: Optional[str] = None,
1293
- transcript_group_id: Optional[str] = None,
1294
- metadata: Optional[Dict[str, Any]] = None,
1295
- ) -> None:
1296
- """
1297
- Send transcript metadata directly to the backend for the current transcript.
1298
-
1299
- Args:
1300
- name: Optional transcript name
1301
- description: Optional transcript description
1302
- parent_transcript_id: Optional parent transcript ID
1303
- metadata: Optional metadata to send
1304
-
1305
- Example:
1306
- transcript_metadata(name="data_processing", description="Process user data")
1307
- transcript_metadata(metadata={"user": "John", "model": "gpt-4"})
1308
- transcript_metadata(name="validation", parent_transcript_id="parent-123")
1309
- """
1310
- try:
1311
- tracer = get_tracer()
1312
- if tracer.is_disabled():
1313
- return
1314
- transcript_id = tracer.get_current_transcript_id()
1315
- if not transcript_id:
1316
- logger.warning("No active transcript context. Metadata will not be sent.")
1317
- return
1318
-
1319
- tracer.send_transcript_metadata(
1320
- transcript_id, name, description, transcript_group_id, metadata
1321
- )
1322
- except Exception as e:
1323
- logger.error(f"Failed to send transcript metadata: {e}")
1324
-
1325
-
1326
- class AgentRunContext:
1327
- """Context manager that works in both sync and async contexts."""
1328
-
1329
- def __init__(
1330
- self,
1331
- agent_run_id: Optional[str] = None,
1332
- transcript_id: Optional[str] = None,
1333
- metadata: Optional[Dict[str, Any]] = None,
1334
- **attributes: Any,
1335
- ):
1336
- self.agent_run_id = agent_run_id
1337
- self.transcript_id = transcript_id
1338
- self.metadata = metadata
1339
- self.attributes: dict[str, Any] = attributes
1340
- self._sync_context: Optional[Any] = None
1341
- self._async_context: Optional[Any] = None
1342
-
1343
- def __enter__(self) -> tuple[str, str]:
1344
- """Sync context manager entry."""
1345
- self._sync_context = get_tracer().agent_run_context(
1346
- self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
1347
- )
1348
- return self._sync_context.__enter__()
1349
-
1350
- def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
1351
- """Sync context manager exit."""
1352
- if self._sync_context:
1353
- self._sync_context.__exit__(exc_type, exc_val, exc_tb)
1354
-
1355
- async def __aenter__(self) -> tuple[str, str]:
1356
- """Async context manager entry."""
1357
- self._async_context = get_tracer().async_agent_run_context(
1358
- self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
1359
- )
1360
- return await self._async_context.__aenter__()
1361
-
1362
- async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
1363
- """Async context manager exit."""
1364
- if self._async_context:
1365
- await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
1366
-
1367
-
1368
- def agent_run(
1369
- func: Optional[Callable[..., Any]] = None, *, metadata: Optional[Dict[str, Any]] = None
1370
- ):
1371
- """
1372
- Decorator to wrap a function in an agent_run_context (sync or async).
1373
- Injects agent_run_id and transcript_id as function attributes.
1374
- Optionally accepts metadata to attach to the agent run context.
1375
-
1376
- Example:
1377
- @agent_run
1378
- def my_func(x, y):
1379
- print(my_func.docent.agent_run_id, my_func.docent.transcript_id)
1380
-
1381
- @agent_run(metadata={"user": "John", "model": "gpt-4"})
1382
- def my_func_with_metadata(x, y):
1383
- print(my_func_with_metadata.docent.agent_run_id)
1384
-
1385
- @agent_run(metadata={"config": {"model": "gpt-4", "temperature": 0.7}})
1386
- async def my_async_func(z):
1387
- print(my_async_func.docent.agent_run_id)
1388
- """
1389
- import functools
1390
- import inspect
1391
-
1392
- def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
1393
- if inspect.iscoroutinefunction(f):
1394
-
1395
- @functools.wraps(f)
1396
- async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
1397
- async with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
1398
- # Store docent data as function attributes
1399
- setattr(
1400
- async_wrapper,
1401
- "docent",
1402
- type(
1403
- "DocentData",
1404
- (),
1405
- {
1406
- "agent_run_id": agent_run_id,
1407
- "transcript_id": transcript_id,
1408
- },
1409
- )(),
1410
- )
1411
- return await f(*args, **kwargs)
1412
-
1413
- return async_wrapper
1414
- else:
1415
-
1416
- @functools.wraps(f)
1417
- def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
1418
- with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
1419
- # Store docent data as function attributes
1420
- setattr(
1421
- sync_wrapper,
1422
- "docent",
1423
- type(
1424
- "DocentData",
1425
- (),
1426
- {
1427
- "agent_run_id": agent_run_id,
1428
- "transcript_id": transcript_id,
1429
- },
1430
- )(),
1431
- )
1432
- return f(*args, **kwargs)
1433
-
1434
- return sync_wrapper
1435
-
1436
- if func is None:
1437
- return decorator
1438
- else:
1439
- return decorator(func)
1440
-
1441
-
1442
- def agent_run_context(
1443
- agent_run_id: Optional[str] = None,
1444
- transcript_id: Optional[str] = None,
1445
- metadata: Optional[Dict[str, Any]] = None,
1446
- **attributes: Any,
1447
- ) -> AgentRunContext:
1448
- """
1449
- Create an agent run context for tracing.
1450
-
1451
- Args:
1452
- agent_run_id: Optional agent run ID (auto-generated if not provided)
1453
- transcript_id: Optional transcript ID (auto-generated if not provided)
1454
- metadata: Optional nested dictionary of metadata to attach as events
1455
- **attributes: Additional attributes to add to the context
1456
-
1457
- Returns:
1458
- A context manager that can be used with both 'with' and 'async with'
1459
-
1460
- Example:
1461
- # Sync usage
1462
- with agent_run_context() as (agent_run_id, transcript_id):
1463
- pass
1464
-
1465
- # Async usage
1466
- async with agent_run_context() as (agent_run_id, transcript_id):
1467
- pass
1468
-
1469
- # With metadata
1470
- with agent_run_context(metadata={"user": "John", "model": "gpt-4"}) as (agent_run_id, transcript_id):
1471
- pass
1472
- """
1473
- return AgentRunContext(agent_run_id, transcript_id, metadata=metadata, **attributes)
1474
-
1475
-
1476
- class TranscriptContext:
1477
- """Context manager for creating and managing transcripts."""
1478
-
1479
- def __init__(
1480
- self,
1481
- name: Optional[str] = None,
1482
- transcript_id: Optional[str] = None,
1483
- description: Optional[str] = None,
1484
- metadata: Optional[Dict[str, Any]] = None,
1485
- transcript_group_id: Optional[str] = None,
1486
- ):
1487
- self.name = name
1488
- self.transcript_id = transcript_id
1489
- self.description = description
1490
- self.metadata = metadata
1491
- self.transcript_group_id = transcript_group_id
1492
- self._sync_context: Optional[Any] = None
1493
- self._async_context: Optional[Any] = None
1494
-
1495
- def __enter__(self) -> str:
1496
- """Sync context manager entry."""
1497
- self._sync_context = get_tracer().transcript_context(
1498
- name=self.name,
1499
- transcript_id=self.transcript_id,
1500
- description=self.description,
1501
- metadata=self.metadata,
1502
- transcript_group_id=self.transcript_group_id,
1503
- )
1504
- return self._sync_context.__enter__()
1505
-
1506
- def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
1507
- """Sync context manager exit."""
1508
- if self._sync_context:
1509
- self._sync_context.__exit__(exc_type, exc_val, exc_tb)
1510
-
1511
- async def __aenter__(self) -> str:
1512
- """Async context manager entry."""
1513
- self._async_context = get_tracer().async_transcript_context(
1514
- name=self.name,
1515
- transcript_id=self.transcript_id,
1516
- description=self.description,
1517
- metadata=self.metadata,
1518
- transcript_group_id=self.transcript_group_id,
1519
- )
1520
- return await self._async_context.__aenter__()
1521
-
1522
- async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
1523
- """Async context manager exit."""
1524
- if self._async_context:
1525
- await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
1526
-
1527
-
1528
- def transcript(
1529
- func: Optional[Callable[..., Any]] = None,
1530
- *,
1531
- name: Optional[str] = None,
1532
- transcript_id: Optional[str] = None,
1533
- description: Optional[str] = None,
1534
- metadata: Optional[Dict[str, Any]] = None,
1535
- transcript_group_id: Optional[str] = None,
1536
- ):
1537
- """
1538
- Decorator to wrap a function in a transcript context.
1539
- Injects transcript_id as a function attribute.
1540
-
1541
- Example:
1542
- @transcript
1543
- def my_func(x, y):
1544
- print(my_func.docent.transcript_id)
1545
-
1546
- @transcript(name="data_processing", description="Process user data")
1547
- def my_func_with_name(x, y):
1548
- print(my_func_with_name.docent.transcript_id)
1549
-
1550
- @transcript(metadata={"user": "John", "model": "gpt-4"})
1551
- async def my_async_func(z):
1552
- print(my_async_func.docent.transcript_id)
1553
- """
1554
- import functools
1555
- import inspect
1556
-
1557
- def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
1558
- if inspect.iscoroutinefunction(f):
1559
-
1560
- @functools.wraps(f)
1561
- async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
1562
- async with TranscriptContext(
1563
- name=name,
1564
- transcript_id=transcript_id,
1565
- description=description,
1566
- metadata=metadata,
1567
- transcript_group_id=transcript_group_id,
1568
- ) as transcript_id_result:
1569
- # Store docent data as function attributes
1570
- setattr(
1571
- async_wrapper,
1572
- "docent",
1573
- type(
1574
- "DocentData",
1575
- (),
1576
- {
1577
- "transcript_id": transcript_id_result,
1578
- },
1579
- )(),
1580
- )
1581
- return await f(*args, **kwargs)
1582
-
1583
- return async_wrapper
1584
- else:
1585
-
1586
- @functools.wraps(f)
1587
- def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
1588
- with TranscriptContext(
1589
- name=name,
1590
- transcript_id=transcript_id,
1591
- description=description,
1592
- metadata=metadata,
1593
- transcript_group_id=transcript_group_id,
1594
- ) as transcript_id_result:
1595
- # Store docent data as function attributes
1596
- setattr(
1597
- sync_wrapper,
1598
- "docent",
1599
- type(
1600
- "DocentData",
1601
- (),
1602
- {
1603
- "transcript_id": transcript_id_result,
1604
- },
1605
- )(),
1606
- )
1607
- return f(*args, **kwargs)
1608
-
1609
- return sync_wrapper
1610
-
1611
- if func is None:
1612
- return decorator
1613
- else:
1614
- return decorator(func)
1615
-
1616
-
1617
- def transcript_context(
1618
- name: Optional[str] = None,
1619
- transcript_id: Optional[str] = None,
1620
- description: Optional[str] = None,
1621
- metadata: Optional[Dict[str, Any]] = None,
1622
- transcript_group_id: Optional[str] = None,
1623
- ) -> TranscriptContext:
1624
- """
1625
- Create a transcript context for tracing.
1626
-
1627
- Args:
1628
- name: Optional transcript name
1629
- transcript_id: Optional transcript ID (auto-generated if not provided)
1630
- description: Optional transcript description
1631
- metadata: Optional metadata to attach to the transcript
1632
- parent_transcript_id: Optional parent transcript ID
1633
-
1634
- Returns:
1635
- A context manager that can be used with both 'with' and 'async with'
1636
-
1637
- Example:
1638
- # Sync usage
1639
- with transcript_context(name="data_processing") as transcript_id:
1640
- pass
1641
-
1642
- # Async usage
1643
- async with transcript_context(description="Process user data") as transcript_id:
1644
- pass
1645
-
1646
- # With metadata
1647
- with transcript_context(metadata={"user": "John", "model": "gpt-4"}) as transcript_id:
1648
- pass
1649
- """
1650
- return TranscriptContext(name, transcript_id, description, metadata, transcript_group_id)
1651
-
1652
-
1653
- class TranscriptGroupContext:
1654
- """Context manager for creating and managing transcript groups."""
1655
-
1656
- def __init__(
1657
- self,
1658
- name: Optional[str] = None,
1659
- transcript_group_id: Optional[str] = None,
1660
- description: Optional[str] = None,
1661
- metadata: Optional[Dict[str, Any]] = None,
1662
- parent_transcript_group_id: Optional[str] = None,
1663
- ):
1664
- self.name = name
1665
- self.transcript_group_id = transcript_group_id
1666
- self.description = description
1667
- self.metadata = metadata
1668
- self.parent_transcript_group_id = parent_transcript_group_id
1669
- self._sync_context: Optional[Any] = None
1670
- self._async_context: Optional[Any] = None
1671
-
1672
- def __enter__(self) -> str:
1673
- """Sync context manager entry."""
1674
- self._sync_context = get_tracer().transcript_group_context(
1675
- name=self.name,
1676
- transcript_group_id=self.transcript_group_id,
1677
- description=self.description,
1678
- metadata=self.metadata,
1679
- parent_transcript_group_id=self.parent_transcript_group_id,
1680
- )
1681
- return self._sync_context.__enter__()
1682
-
1683
- def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
1684
- """Sync context manager exit."""
1685
- if self._sync_context:
1686
- self._sync_context.__exit__(exc_type, exc_val, exc_tb)
1687
-
1688
- async def __aenter__(self) -> str:
1689
- """Async context manager entry."""
1690
- self._async_context = get_tracer().async_transcript_group_context(
1691
- name=self.name,
1692
- transcript_group_id=self.transcript_group_id,
1693
- description=self.description,
1694
- metadata=self.metadata,
1695
- parent_transcript_group_id=self.parent_transcript_group_id,
1696
- )
1697
- return await self._async_context.__aenter__()
1698
-
1699
- async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
1700
- """Async context manager exit."""
1701
- if self._async_context:
1702
- await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
1703
-
1704
-
1705
- def transcript_group(
1706
- func: Optional[Callable[..., Any]] = None,
1707
- *,
1708
- name: Optional[str] = None,
1709
- transcript_group_id: Optional[str] = None,
1710
- description: Optional[str] = None,
1711
- metadata: Optional[Dict[str, Any]] = None,
1712
- parent_transcript_group_id: Optional[str] = None,
1713
- ):
1714
- """
1715
- Decorator to wrap a function in a transcript group context.
1716
- Injects transcript_group_id as a function attribute.
1717
-
1718
- Example:
1719
- @transcript_group
1720
- def my_func(x, y):
1721
- print(my_func.docent.transcript_group_id)
1722
-
1723
- @transcript_group(name="data_processing", description="Process user data")
1724
- def my_func_with_name(x, y):
1725
- print(my_func_with_name.docent.transcript_group_id)
1726
-
1727
- @transcript_group(metadata={"user": "John", "model": "gpt-4"})
1728
- async def my_async_func(z):
1729
- print(my_async_func.docent.transcript_group_id)
1730
- """
1731
- import functools
1732
- import inspect
1733
-
1734
- def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
1735
- if inspect.iscoroutinefunction(f):
1736
-
1737
- @functools.wraps(f)
1738
- async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
1739
- async with TranscriptGroupContext(
1740
- name=name,
1741
- transcript_group_id=transcript_group_id,
1742
- description=description,
1743
- metadata=metadata,
1744
- parent_transcript_group_id=parent_transcript_group_id,
1745
- ) as transcript_group_id_result:
1746
- # Store docent data as function attributes
1747
- setattr(
1748
- async_wrapper,
1749
- "docent",
1750
- type(
1751
- "DocentData",
1752
- (),
1753
- {
1754
- "transcript_group_id": transcript_group_id_result,
1755
- },
1756
- )(),
1757
- )
1758
- return await f(*args, **kwargs)
1759
-
1760
- return async_wrapper
1761
- else:
1762
-
1763
- @functools.wraps(f)
1764
- def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
1765
- with TranscriptGroupContext(
1766
- name=name,
1767
- transcript_group_id=transcript_group_id,
1768
- description=description,
1769
- metadata=metadata,
1770
- parent_transcript_group_id=parent_transcript_group_id,
1771
- ) as transcript_group_id_result:
1772
- # Store docent data as function attributes
1773
- setattr(
1774
- sync_wrapper,
1775
- "docent",
1776
- type(
1777
- "DocentData",
1778
- (),
1779
- {
1780
- "transcript_group_id": transcript_group_id_result,
1781
- },
1782
- )(),
1783
- )
1784
- return f(*args, **kwargs)
1785
-
1786
- return sync_wrapper
1787
-
1788
- if func is None:
1789
- return decorator
1790
- else:
1791
- return decorator(func)
1792
-
1793
-
1794
- def transcript_group_context(
1795
- name: Optional[str] = None,
1796
- transcript_group_id: Optional[str] = None,
1797
- description: Optional[str] = None,
1798
- metadata: Optional[Dict[str, Any]] = None,
1799
- parent_transcript_group_id: Optional[str] = None,
1800
- ) -> TranscriptGroupContext:
1801
- """
1802
- Create a transcript group context for tracing.
1803
-
1804
- Args:
1805
- name: Optional transcript group name
1806
- transcript_group_id: Optional transcript group ID (auto-generated if not provided)
1807
- description: Optional transcript group description
1808
- metadata: Optional metadata to attach to the transcript group
1809
- parent_transcript_group_id: Optional parent transcript group ID
1810
-
1811
- Returns:
1812
- A context manager that can be used with both 'with' and 'async with'
1813
-
1814
- Example:
1815
- # Sync usage
1816
- with transcript_group_context(name="data_processing") as transcript_group_id:
1817
- pass
1818
-
1819
- # Async usage
1820
- async with transcript_group_context(description="Process user data") as transcript_group_id:
1821
- pass
1822
-
1823
- # With metadata
1824
- with transcript_group_context(metadata={"user": "John", "model": "gpt-4"}) as transcript_group_id:
1825
- pass
1826
- """
1827
- return TranscriptGroupContext(
1828
- name, transcript_group_id, description, metadata, parent_transcript_group_id
1829
- )
1830
-
1831
-
1832
- def _is_tracing_disabled() -> bool:
1833
- """Check if tracing is disabled via environment variable."""
1834
- return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
1835
-
1836
-
1837
- def _is_notebook() -> bool:
1838
- """Check if we're running in a Jupyter notebook."""
1839
- try:
1840
- return "ipykernel" in sys.modules
1841
- except Exception:
1842
- return False