docent-python 0.1.41a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/__init__.py +4 -0
- docent/_llm_util/__init__.py +0 -0
- docent/_llm_util/data_models/__init__.py +0 -0
- docent/_llm_util/data_models/exceptions.py +48 -0
- docent/_llm_util/data_models/llm_output.py +331 -0
- docent/_llm_util/llm_cache.py +193 -0
- docent/_llm_util/llm_svc.py +472 -0
- docent/_llm_util/model_registry.py +134 -0
- docent/_llm_util/providers/__init__.py +0 -0
- docent/_llm_util/providers/anthropic.py +537 -0
- docent/_llm_util/providers/common.py +41 -0
- docent/_llm_util/providers/google.py +530 -0
- docent/_llm_util/providers/openai.py +745 -0
- docent/_llm_util/providers/openrouter.py +375 -0
- docent/_llm_util/providers/preference_types.py +104 -0
- docent/_llm_util/providers/provider_registry.py +164 -0
- docent/_log_util/__init__.py +3 -0
- docent/_log_util/logger.py +141 -0
- docent/data_models/__init__.py +14 -0
- docent/data_models/_tiktoken_util.py +91 -0
- docent/data_models/agent_run.py +473 -0
- docent/data_models/chat/__init__.py +37 -0
- docent/data_models/chat/content.py +56 -0
- docent/data_models/chat/message.py +191 -0
- docent/data_models/chat/tool.py +109 -0
- docent/data_models/citation.py +187 -0
- docent/data_models/formatted_objects.py +84 -0
- docent/data_models/judge.py +17 -0
- docent/data_models/metadata_util.py +16 -0
- docent/data_models/regex.py +56 -0
- docent/data_models/transcript.py +305 -0
- docent/data_models/util.py +170 -0
- docent/judges/__init__.py +23 -0
- docent/judges/analysis.py +77 -0
- docent/judges/impl.py +587 -0
- docent/judges/runner.py +129 -0
- docent/judges/stats.py +205 -0
- docent/judges/types.py +320 -0
- docent/judges/util/forgiving_json.py +108 -0
- docent/judges/util/meta_schema.json +86 -0
- docent/judges/util/meta_schema.py +29 -0
- docent/judges/util/parse_output.py +68 -0
- docent/judges/util/voting.py +139 -0
- docent/loaders/load_inspect.py +215 -0
- docent/py.typed +0 -0
- docent/samples/__init__.py +3 -0
- docent/samples/load.py +9 -0
- docent/samples/log.eval +0 -0
- docent/samples/tb_airline.json +1 -0
- docent/sdk/__init__.py +0 -0
- docent/sdk/agent_run_writer.py +317 -0
- docent/sdk/client.py +1186 -0
- docent/sdk/llm_context.py +432 -0
- docent/trace.py +2741 -0
- docent/trace_temp.py +1086 -0
- docent_python-0.1.41a0.dist-info/METADATA +33 -0
- docent_python-0.1.41a0.dist-info/RECORD +59 -0
- docent_python-0.1.41a0.dist-info/WHEEL +4 -0
- docent_python-0.1.41a0.dist-info/licenses/LICENSE.md +13 -0
docent/trace.py
ADDED
|
@@ -0,0 +1,2741 @@
|
|
|
1
|
+
# pyright: reportUnnecessaryIsInstance=false
|
|
2
|
+
|
|
3
|
+
import atexit
|
|
4
|
+
import contextvars
|
|
5
|
+
import itertools
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
14
|
+
from contextvars import ContextVar, Token
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from importlib.metadata import Distribution, distributions
|
|
18
|
+
from typing import (
|
|
19
|
+
Any,
|
|
20
|
+
AsyncIterator,
|
|
21
|
+
Callable,
|
|
22
|
+
Dict,
|
|
23
|
+
Iterator,
|
|
24
|
+
List,
|
|
25
|
+
Mapping,
|
|
26
|
+
Optional,
|
|
27
|
+
Sequence,
|
|
28
|
+
Set,
|
|
29
|
+
Union,
|
|
30
|
+
cast,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
import requests
|
|
34
|
+
from opentelemetry import trace
|
|
35
|
+
from opentelemetry.context import Context
|
|
36
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
|
|
37
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
|
|
38
|
+
from opentelemetry.instrumentation.threading import ThreadingInstrumentor
|
|
39
|
+
from opentelemetry.sdk.resources import Resource
|
|
40
|
+
from opentelemetry.sdk.trace import ReadableSpan, SpanLimits, SpanProcessor, TracerProvider
|
|
41
|
+
from opentelemetry.sdk.trace.export import (
|
|
42
|
+
BatchSpanProcessor,
|
|
43
|
+
ConsoleSpanExporter,
|
|
44
|
+
SimpleSpanProcessor,
|
|
45
|
+
)
|
|
46
|
+
from opentelemetry.trace import Span
|
|
47
|
+
from requests import Response
|
|
48
|
+
|
|
49
|
+
from docent._log_util import get_logger
|
|
50
|
+
|
|
51
|
+
logger = get_logger(__name__)
|
|
52
|
+
|
|
53
|
+
# Default configuration
|
|
54
|
+
DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
|
|
55
|
+
DEFAULT_COLLECTION_NAME = "default-collection-name"
|
|
56
|
+
ERROR_DETAIL_MAX_CHARS = 500
|
|
57
|
+
|
|
58
|
+
# Sentinel values for when tracing is disabled
|
|
59
|
+
DISABLED_AGENT_RUN_ID = "disabled"
|
|
60
|
+
DISABLED_TRANSCRIPT_ID = "disabled"
|
|
61
|
+
DISABLED_TRANSCRIPT_GROUP_ID = "disabled"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _get_disabled_agent_run_id(agent_run_id: Optional[str]) -> str:
|
|
65
|
+
"""Return sentinel value for agent run ID when tracing is disabled."""
|
|
66
|
+
if agent_run_id is None:
|
|
67
|
+
return DISABLED_AGENT_RUN_ID
|
|
68
|
+
return agent_run_id
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _get_disabled_transcript_id(transcript_id: Optional[str]) -> str:
|
|
72
|
+
"""Return sentinel value for transcript ID when tracing is disabled."""
|
|
73
|
+
if transcript_id is None:
|
|
74
|
+
return DISABLED_TRANSCRIPT_ID
|
|
75
|
+
return transcript_id
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _get_disabled_transcript_group_id(transcript_group_id: Optional[str]) -> str:
|
|
79
|
+
"""Return sentinel value for transcript group ID when tracing is disabled."""
|
|
80
|
+
if transcript_group_id is None:
|
|
81
|
+
return DISABLED_TRANSCRIPT_GROUP_ID
|
|
82
|
+
return transcript_group_id
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class DocentTelemetryRequestError(RuntimeError):
|
|
86
|
+
"""Raised when the Docent telemetry backend rejects a client request."""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class Instruments(Enum):
|
|
90
|
+
"""Enumeration of available instrument types."""
|
|
91
|
+
|
|
92
|
+
OPENAI = "openai"
|
|
93
|
+
ANTHROPIC = "anthropic"
|
|
94
|
+
BEDROCK = "bedrock"
|
|
95
|
+
LANGCHAIN = "langchain"
|
|
96
|
+
GOOGLE_GENERATIVEAI = "google_generativeai"
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class DocentTracer:
|
|
100
|
+
"""
|
|
101
|
+
Manages Docent tracing setup and provides tracing utilities.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
collection_name: str = DEFAULT_COLLECTION_NAME,
|
|
107
|
+
collection_id: Optional[str] = None,
|
|
108
|
+
agent_run_id: Optional[str] = None,
|
|
109
|
+
endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
|
|
110
|
+
headers: Optional[Dict[str, str]] = None,
|
|
111
|
+
api_key: Optional[str] = None,
|
|
112
|
+
enable_console_export: bool = False,
|
|
113
|
+
enable_otlp_export: bool = True,
|
|
114
|
+
disable_batch: bool = False,
|
|
115
|
+
instruments: Optional[Set[Instruments]] = None,
|
|
116
|
+
block_instruments: Optional[Set[Instruments]] = None,
|
|
117
|
+
):
|
|
118
|
+
self._initialized: bool = False
|
|
119
|
+
# Check if tracing is disabled via environment variable
|
|
120
|
+
if _global_tracing_disabled:
|
|
121
|
+
self._disabled = True
|
|
122
|
+
logger.info("Docent tracing disabled.")
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
if not isinstance(collection_name, str) or not collection_name:
|
|
126
|
+
logger.error(
|
|
127
|
+
"collection_name must be provided as a non-empty string (got %r); defaulting to %s.",
|
|
128
|
+
collection_name,
|
|
129
|
+
DEFAULT_COLLECTION_NAME,
|
|
130
|
+
)
|
|
131
|
+
self.collection_name = DEFAULT_COLLECTION_NAME
|
|
132
|
+
else:
|
|
133
|
+
self.collection_name = collection_name
|
|
134
|
+
|
|
135
|
+
if collection_id is not None:
|
|
136
|
+
if isinstance(collection_id, str) and collection_id:
|
|
137
|
+
self.collection_id = collection_id
|
|
138
|
+
else:
|
|
139
|
+
logger.error(
|
|
140
|
+
"collection_id must be provided as a non-empty string (got %r); generating a new ID.",
|
|
141
|
+
collection_id,
|
|
142
|
+
)
|
|
143
|
+
self.collection_id = str(uuid.uuid4())
|
|
144
|
+
else:
|
|
145
|
+
self.collection_id = str(uuid.uuid4())
|
|
146
|
+
|
|
147
|
+
if agent_run_id is not None:
|
|
148
|
+
if isinstance(agent_run_id, str) and agent_run_id:
|
|
149
|
+
self.default_agent_run_id = agent_run_id
|
|
150
|
+
else:
|
|
151
|
+
logger.error(
|
|
152
|
+
"default agent_run_id must be a non-empty string (got %r); generating a new ID.",
|
|
153
|
+
agent_run_id,
|
|
154
|
+
)
|
|
155
|
+
self.default_agent_run_id = str(uuid.uuid4())
|
|
156
|
+
else:
|
|
157
|
+
self.default_agent_run_id = str(uuid.uuid4())
|
|
158
|
+
self.endpoints: List[str] = self._prepare_endpoints(endpoint)
|
|
159
|
+
|
|
160
|
+
# Build headers with authentication if provided
|
|
161
|
+
if headers is None:
|
|
162
|
+
self.headers: Dict[str, str] = {}
|
|
163
|
+
elif not isinstance(headers, dict):
|
|
164
|
+
logger.error(
|
|
165
|
+
"HTTP headers for Docent tracing must be provided as a dict (got %r).",
|
|
166
|
+
headers,
|
|
167
|
+
)
|
|
168
|
+
self.headers = {}
|
|
169
|
+
else:
|
|
170
|
+
sanitized_headers: Dict[str, str] = {}
|
|
171
|
+
for header_key, header_value in headers.items():
|
|
172
|
+
if not isinstance(header_key, str):
|
|
173
|
+
logger.error(
|
|
174
|
+
"HTTP header keys must be strings; skipping key %r of type %s.",
|
|
175
|
+
header_key,
|
|
176
|
+
type(header_key).__name__,
|
|
177
|
+
)
|
|
178
|
+
continue
|
|
179
|
+
if not isinstance(header_value, str):
|
|
180
|
+
logger.error(
|
|
181
|
+
"HTTP header values must be strings; skipping '%s' value of type %s.",
|
|
182
|
+
header_key,
|
|
183
|
+
type(header_value).__name__,
|
|
184
|
+
)
|
|
185
|
+
continue
|
|
186
|
+
sanitized_headers[header_key] = header_value
|
|
187
|
+
self.headers = sanitized_headers
|
|
188
|
+
|
|
189
|
+
# Handle API key authentication (takes precedence over custom headers)
|
|
190
|
+
if api_key is not None:
|
|
191
|
+
if isinstance(api_key, str) and api_key:
|
|
192
|
+
self.headers["Authorization"] = f"Bearer {api_key}"
|
|
193
|
+
else:
|
|
194
|
+
logger.error(
|
|
195
|
+
"api_key must be a non-empty string (got %r); ignoring value.", api_key
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if self.headers.get("Authorization"):
|
|
199
|
+
logger.info(f"Using API key authentication for {self.collection_name}")
|
|
200
|
+
else:
|
|
201
|
+
logger.info(f"No authentication configured for {self.collection_name}")
|
|
202
|
+
|
|
203
|
+
self.enable_console_export = enable_console_export
|
|
204
|
+
self.enable_otlp_export = enable_otlp_export
|
|
205
|
+
self.disable_batch = disable_batch
|
|
206
|
+
self.disabled_instruments: Set[Instruments] = {Instruments.LANGCHAIN}
|
|
207
|
+
self.instruments = instruments or (set(Instruments) - self.disabled_instruments)
|
|
208
|
+
self.block_instruments = block_instruments or set()
|
|
209
|
+
|
|
210
|
+
# Use separate tracer provider to avoid interfering with existing OTEL setup
|
|
211
|
+
self._tracer_provider: Optional[TracerProvider] = None
|
|
212
|
+
self._root_context: Optional[Context] = Context()
|
|
213
|
+
self._tracer: Optional[trace.Tracer] = None
|
|
214
|
+
self._cleanup_registered: bool = False
|
|
215
|
+
self._disabled: bool = False
|
|
216
|
+
self._spans_processors: List[Union[BatchSpanProcessor, SimpleSpanProcessor]] = []
|
|
217
|
+
|
|
218
|
+
# Base HTTP endpoint for direct API calls (scores, metadata, trace-done)
|
|
219
|
+
if len(self.endpoints) > 0:
|
|
220
|
+
self._api_endpoint_base: Optional[str] = self.endpoints[0]
|
|
221
|
+
|
|
222
|
+
# Context variables for agent_run_id and transcript_id
|
|
223
|
+
self._collection_id_var: ContextVar[str] = contextvars.ContextVar("docent_collection_id")
|
|
224
|
+
self._agent_run_id_var: ContextVar[str] = contextvars.ContextVar("docent_agent_run_id")
|
|
225
|
+
self._transcript_id_var: ContextVar[str] = contextvars.ContextVar("docent_transcript_id")
|
|
226
|
+
self._transcript_group_id_var: ContextVar[str] = contextvars.ContextVar(
|
|
227
|
+
"docent_transcript_group_id"
|
|
228
|
+
)
|
|
229
|
+
self._attributes_var: ContextVar[dict[str, Any]] = contextvars.ContextVar(
|
|
230
|
+
"docent_attributes"
|
|
231
|
+
)
|
|
232
|
+
# Store atomic span order counters per transcript_id to persist across context switches
|
|
233
|
+
self._transcript_counters: defaultdict[str, itertools.count[int]] = defaultdict(
|
|
234
|
+
lambda: itertools.count(0)
|
|
235
|
+
)
|
|
236
|
+
self._transcript_counter_lock = threading.Lock()
|
|
237
|
+
self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
|
|
238
|
+
self._transcript_group_state_lock = threading.Lock()
|
|
239
|
+
self._flush_lock = threading.Lock()
|
|
240
|
+
self._pending_agent_run_metadata_events: defaultdict[str, List[Dict[str, Any]]] = (
|
|
241
|
+
defaultdict(list)
|
|
242
|
+
)
|
|
243
|
+
self._pending_transcript_metadata_events: defaultdict[str, List[Dict[str, Any]]] = (
|
|
244
|
+
defaultdict(list)
|
|
245
|
+
)
|
|
246
|
+
# Transcript-group events are keyed by agent_run_id so they flush even if no span carries the group attribute.
|
|
247
|
+
self._pending_transcript_group_metadata_events: defaultdict[str, List[Dict[str, Any]]] = (
|
|
248
|
+
defaultdict(list)
|
|
249
|
+
)
|
|
250
|
+
self._pending_metadata_lock = threading.Lock()
|
|
251
|
+
|
|
252
|
+
def _prepare_endpoints(self, endpoint: Union[str, Sequence[str]]) -> List[str]:
|
|
253
|
+
"""
|
|
254
|
+
Normalize endpoint input with simple type checks; fall back to DEFAULT_ENDPOINT as needed.
|
|
255
|
+
"""
|
|
256
|
+
endpoints: List[str] = []
|
|
257
|
+
|
|
258
|
+
if isinstance(endpoint, str):
|
|
259
|
+
candidate = endpoint.strip()
|
|
260
|
+
if not candidate:
|
|
261
|
+
logger.error(
|
|
262
|
+
"Docent telemetry endpoint cannot be empty; defaulting to %s.", DEFAULT_ENDPOINT
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
endpoints.append(candidate)
|
|
266
|
+
elif isinstance(endpoint, (list, tuple)):
|
|
267
|
+
for index, value in enumerate(endpoint):
|
|
268
|
+
if not isinstance(value, str):
|
|
269
|
+
logger.error(
|
|
270
|
+
"Endpoint entries must be strings; entry at index %s is %s (%r). Skipping it.",
|
|
271
|
+
index,
|
|
272
|
+
type(value).__name__,
|
|
273
|
+
value,
|
|
274
|
+
)
|
|
275
|
+
continue
|
|
276
|
+
candidate = value.strip()
|
|
277
|
+
if not candidate:
|
|
278
|
+
logger.error(
|
|
279
|
+
"Endpoint entries cannot be empty strings (index %s). Skipping it.",
|
|
280
|
+
index,
|
|
281
|
+
)
|
|
282
|
+
continue
|
|
283
|
+
endpoints.append(candidate)
|
|
284
|
+
else:
|
|
285
|
+
logger.error(
|
|
286
|
+
"Endpoint must be a string or list/tuple of strings (got %r). Defaulting to %s.",
|
|
287
|
+
endpoint,
|
|
288
|
+
DEFAULT_ENDPOINT,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
if not endpoints:
|
|
292
|
+
endpoints = [DEFAULT_ENDPOINT]
|
|
293
|
+
|
|
294
|
+
return endpoints
|
|
295
|
+
|
|
296
|
+
def get_current_agent_run_id(self) -> Optional[str]:
|
|
297
|
+
"""
|
|
298
|
+
Get the current agent run ID from context.
|
|
299
|
+
|
|
300
|
+
Retrieves the agent run ID that was set in the current execution context.
|
|
301
|
+
If no agent run context is active, returns the default agent run ID.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
The current agent run ID if available, or the default agent run ID
|
|
305
|
+
if no context is active.
|
|
306
|
+
"""
|
|
307
|
+
try:
|
|
308
|
+
return self._agent_run_id_var.get()
|
|
309
|
+
except LookupError:
|
|
310
|
+
return self.default_agent_run_id
|
|
311
|
+
|
|
312
|
+
def _register_cleanup(self):
|
|
313
|
+
"""Register cleanup handlers."""
|
|
314
|
+
if self._cleanup_registered:
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
# Register atexit handler
|
|
318
|
+
atexit.register(self.cleanup)
|
|
319
|
+
|
|
320
|
+
self._cleanup_registered = True
|
|
321
|
+
|
|
322
|
+
def _next_span_order(self, transcript_id: str) -> int:
|
|
323
|
+
"""
|
|
324
|
+
Get the next span order for a given transcript_id.
|
|
325
|
+
Thread-safe and guaranteed to be unique and monotonic.
|
|
326
|
+
"""
|
|
327
|
+
with self._transcript_counter_lock:
|
|
328
|
+
return next(self._transcript_counters[transcript_id])
|
|
329
|
+
|
|
330
|
+
def _get_current_span(self) -> Optional[Span]:
|
|
331
|
+
"""Return the active span, ignoring non-recording placeholders."""
|
|
332
|
+
try:
|
|
333
|
+
span = trace.get_current_span()
|
|
334
|
+
except Exception:
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
span_context = span.get_span_context()
|
|
339
|
+
except AttributeError:
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
if span_context is None or not span_context.is_valid:
|
|
343
|
+
return None
|
|
344
|
+
return span
|
|
345
|
+
|
|
346
|
+
def _create_metadata_event(
|
|
347
|
+
self,
|
|
348
|
+
*,
|
|
349
|
+
name: str,
|
|
350
|
+
metadata: Optional[Dict[str, Any]],
|
|
351
|
+
attributes: Dict[str, Any],
|
|
352
|
+
timestamp_ns: Optional[int] = None,
|
|
353
|
+
) -> Dict[str, Any]:
|
|
354
|
+
return {
|
|
355
|
+
"name": name,
|
|
356
|
+
"metadata": metadata or {},
|
|
357
|
+
"attributes": attributes,
|
|
358
|
+
"timestamp_ns": timestamp_ns or time.time_ns(),
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
def _add_metadata_event_to_span(self, span: Span, event: Dict[str, Any]) -> None:
|
|
362
|
+
if not hasattr(span, "add_event"):
|
|
363
|
+
return
|
|
364
|
+
|
|
365
|
+
event_attributes: Dict[str, Any] = dict(event.get("attributes", {}))
|
|
366
|
+
metadata_payload = cast(Optional[Dict[str, Any]], event.get("metadata"))
|
|
367
|
+
if metadata_payload is not None:
|
|
368
|
+
try:
|
|
369
|
+
event_attributes["metadata_json"] = json.dumps(metadata_payload)
|
|
370
|
+
except (TypeError, ValueError) as exc:
|
|
371
|
+
logger.warning("Failed to serialize metadata payload for span event: %s", exc)
|
|
372
|
+
|
|
373
|
+
timestamp_ns = event.get("timestamp_ns")
|
|
374
|
+
span.add_event(
|
|
375
|
+
event.get("name", "metadata"), attributes=event_attributes, timestamp=timestamp_ns
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
def _pop_pending_events(
|
|
379
|
+
self, store: defaultdict[str, List[Dict[str, Any]]], key: Optional[str]
|
|
380
|
+
) -> List[Dict[str, Any]]:
|
|
381
|
+
if key is None:
|
|
382
|
+
return []
|
|
383
|
+
with self._pending_metadata_lock:
|
|
384
|
+
if key not in store:
|
|
385
|
+
return []
|
|
386
|
+
events = list(store[key])
|
|
387
|
+
del store[key]
|
|
388
|
+
return events
|
|
389
|
+
|
|
390
|
+
def _emit_pending_metadata_events(
|
|
391
|
+
self,
|
|
392
|
+
span: Span,
|
|
393
|
+
*,
|
|
394
|
+
agent_run_id: Optional[str],
|
|
395
|
+
transcript_id: Optional[str],
|
|
396
|
+
transcript_group_id: Optional[str],
|
|
397
|
+
) -> None:
|
|
398
|
+
for event in self._pop_pending_events(
|
|
399
|
+
self._pending_agent_run_metadata_events, agent_run_id
|
|
400
|
+
):
|
|
401
|
+
self._add_metadata_event_to_span(span, event)
|
|
402
|
+
for event in self._pop_pending_events(
|
|
403
|
+
self._pending_transcript_metadata_events, transcript_id
|
|
404
|
+
):
|
|
405
|
+
self._add_metadata_event_to_span(span, event)
|
|
406
|
+
for event in self._pop_pending_events(
|
|
407
|
+
self._pending_transcript_group_metadata_events, agent_run_id
|
|
408
|
+
):
|
|
409
|
+
self._add_metadata_event_to_span(span, event)
|
|
410
|
+
|
|
411
|
+
def _queue_metadata_event(
|
|
412
|
+
self,
|
|
413
|
+
store: defaultdict[str, List[Dict[str, Any]]],
|
|
414
|
+
key: Optional[str],
|
|
415
|
+
event: Dict[str, Any],
|
|
416
|
+
) -> None:
|
|
417
|
+
if not key:
|
|
418
|
+
logger.warning("Metadata event discarded because no identifier was provided: %s", event)
|
|
419
|
+
return
|
|
420
|
+
with self._pending_metadata_lock:
|
|
421
|
+
store[key].append(event)
|
|
422
|
+
|
|
423
|
+
def _emit_or_queue_metadata_event(
|
|
424
|
+
self,
|
|
425
|
+
*,
|
|
426
|
+
store: defaultdict[str, List[Dict[str, Any]]],
|
|
427
|
+
key: Optional[str],
|
|
428
|
+
event: Dict[str, Any],
|
|
429
|
+
) -> None:
|
|
430
|
+
span = self._get_current_span()
|
|
431
|
+
if span is not None:
|
|
432
|
+
try:
|
|
433
|
+
self._add_metadata_event_to_span(span, event)
|
|
434
|
+
return
|
|
435
|
+
except Exception as exc:
|
|
436
|
+
logger.warning("Failed to attach metadata event to active span: %s", exc)
|
|
437
|
+
self._queue_metadata_event(store, key, event)
|
|
438
|
+
|
|
439
|
+
def _get_optional_context_value(self, var: ContextVar[str]) -> Optional[str]:
|
|
440
|
+
"""Fetch a context var without creating a default when unset."""
|
|
441
|
+
try:
|
|
442
|
+
return var.get()
|
|
443
|
+
except LookupError:
|
|
444
|
+
return None
|
|
445
|
+
|
|
446
|
+
def _has_pending_metadata(
|
|
447
|
+
self,
|
|
448
|
+
*,
|
|
449
|
+
agent_run_id: Optional[str],
|
|
450
|
+
transcript_id: Optional[str],
|
|
451
|
+
transcript_group_id: Optional[str],
|
|
452
|
+
) -> bool:
|
|
453
|
+
with self._pending_metadata_lock:
|
|
454
|
+
if agent_run_id and self._pending_agent_run_metadata_events.get(agent_run_id):
|
|
455
|
+
return True
|
|
456
|
+
if transcript_id and self._pending_transcript_metadata_events.get(transcript_id):
|
|
457
|
+
return True
|
|
458
|
+
if agent_run_id and self._pending_transcript_group_metadata_events.get(agent_run_id):
|
|
459
|
+
return True
|
|
460
|
+
return False
|
|
461
|
+
|
|
462
|
+
def _flush_pending_metadata_events(
|
|
463
|
+
self,
|
|
464
|
+
*,
|
|
465
|
+
agent_run_id: Optional[str],
|
|
466
|
+
transcript_id: Optional[str],
|
|
467
|
+
transcript_group_id: Optional[str],
|
|
468
|
+
) -> None:
|
|
469
|
+
"""
|
|
470
|
+
Attach any queued metadata events to a synthetic span so data is not dropped when no further spans start.
|
|
471
|
+
"""
|
|
472
|
+
if self.is_disabled() or self._tracer is None:
|
|
473
|
+
return
|
|
474
|
+
|
|
475
|
+
if not self._has_pending_metadata(
|
|
476
|
+
agent_run_id=agent_run_id,
|
|
477
|
+
transcript_id=transcript_id,
|
|
478
|
+
transcript_group_id=transcript_group_id,
|
|
479
|
+
):
|
|
480
|
+
return
|
|
481
|
+
|
|
482
|
+
span = self._tracer.start_span("docent.metadata.flush", context=self._root_context)
|
|
483
|
+
try:
|
|
484
|
+
span.set_attribute("collection_id", self.collection_id)
|
|
485
|
+
if agent_run_id:
|
|
486
|
+
span.set_attribute("agent_run_id", agent_run_id)
|
|
487
|
+
if transcript_id:
|
|
488
|
+
span.set_attribute("transcript_id", transcript_id)
|
|
489
|
+
if transcript_group_id:
|
|
490
|
+
span.set_attribute("transcript_group_id", transcript_group_id)
|
|
491
|
+
|
|
492
|
+
self._emit_pending_metadata_events(
|
|
493
|
+
span,
|
|
494
|
+
agent_run_id=agent_run_id,
|
|
495
|
+
transcript_id=transcript_id,
|
|
496
|
+
transcript_group_id=transcript_group_id,
|
|
497
|
+
)
|
|
498
|
+
finally:
|
|
499
|
+
span.end()
|
|
500
|
+
|
|
501
|
+
def _init_spans_exporter(self, endpoint: str) -> Optional[Union[HTTPExporter, GRPCExporter]]:
|
|
502
|
+
"""Initialize the appropriate span exporter based on endpoint."""
|
|
503
|
+
if not self.enable_otlp_export:
|
|
504
|
+
return None
|
|
505
|
+
|
|
506
|
+
try:
|
|
507
|
+
if "http" in endpoint.lower() or "https" in endpoint.lower():
|
|
508
|
+
http_exporter: HTTPExporter = HTTPExporter(
|
|
509
|
+
endpoint=f"{endpoint}/v1/traces", headers=self.headers, timeout=30
|
|
510
|
+
)
|
|
511
|
+
logger.debug(f"Initialized HTTP exporter for endpoint: {endpoint}/v1/traces")
|
|
512
|
+
return http_exporter
|
|
513
|
+
else:
|
|
514
|
+
grpc_exporter: GRPCExporter = GRPCExporter(
|
|
515
|
+
endpoint=endpoint, headers=self.headers, timeout=30
|
|
516
|
+
)
|
|
517
|
+
logger.debug(f"Initialized gRPC exporter for endpoint: {endpoint}")
|
|
518
|
+
return grpc_exporter
|
|
519
|
+
except Exception as e:
|
|
520
|
+
logger.error(f"Failed to initialize span exporter for {endpoint}: {e}")
|
|
521
|
+
return None
|
|
522
|
+
|
|
523
|
+
def _init_spans_exporters(self) -> List[Union[HTTPExporter, GRPCExporter]]:
|
|
524
|
+
"""Initialize span exporters for all endpoints."""
|
|
525
|
+
exporters: List[Union[HTTPExporter, GRPCExporter]] = []
|
|
526
|
+
|
|
527
|
+
for endpoint in self.endpoints:
|
|
528
|
+
exporter = self._init_spans_exporter(endpoint)
|
|
529
|
+
if exporter:
|
|
530
|
+
exporters.append(exporter)
|
|
531
|
+
logger.info(f"Initialized exporter for endpoint: {endpoint}")
|
|
532
|
+
else:
|
|
533
|
+
logger.critical(f"Failed to initialize exporter for endpoint: {endpoint}")
|
|
534
|
+
|
|
535
|
+
return exporters
|
|
536
|
+
|
|
537
|
+
def _create_span_processor(
|
|
538
|
+
self, exporter: Union[HTTPExporter, GRPCExporter, ConsoleSpanExporter]
|
|
539
|
+
) -> Union[SimpleSpanProcessor, BatchSpanProcessor]:
|
|
540
|
+
"""Create appropriate span processor based on configuration."""
|
|
541
|
+
if self.disable_batch or _is_notebook():
|
|
542
|
+
simple_processor: SimpleSpanProcessor = SimpleSpanProcessor(exporter)
|
|
543
|
+
logger.debug("Created SimpleSpanProcessor for immediate export")
|
|
544
|
+
return simple_processor
|
|
545
|
+
else:
|
|
546
|
+
batch_processor: BatchSpanProcessor = BatchSpanProcessor(exporter)
|
|
547
|
+
logger.debug("Created BatchSpanProcessor for batched export")
|
|
548
|
+
return batch_processor
|
|
549
|
+
|
|
550
|
+
def initialize(self):
|
|
551
|
+
"""Initialize Docent tracing setup."""
|
|
552
|
+
if self._initialized:
|
|
553
|
+
return
|
|
554
|
+
|
|
555
|
+
# If tracing is disabled, mark as initialized but don't set up anything
|
|
556
|
+
if self.is_disabled():
|
|
557
|
+
self._initialized = True
|
|
558
|
+
return
|
|
559
|
+
|
|
560
|
+
try:
|
|
561
|
+
|
|
562
|
+
# Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
|
|
563
|
+
default_attribute_limit = 1024 * 16
|
|
564
|
+
env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
|
|
565
|
+
env_limit = int(env_value) if env_value.isdigit() else 0
|
|
566
|
+
attribute_limit = max(env_limit, default_attribute_limit)
|
|
567
|
+
|
|
568
|
+
span_limits = SpanLimits(
|
|
569
|
+
max_attributes=attribute_limit,
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Create our own isolated tracer provider
|
|
573
|
+
self._tracer_provider = TracerProvider(
|
|
574
|
+
resource=Resource.create({"service.name": self.collection_name}),
|
|
575
|
+
span_limits=span_limits,
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
class ContextSpanProcessor(SpanProcessor):
|
|
579
|
+
def __init__(self, manager: "DocentTracer"):
|
|
580
|
+
self.manager: "DocentTracer" = manager
|
|
581
|
+
|
|
582
|
+
def on_start(self, span: Span, parent_context: Optional[Context] = None) -> None:
|
|
583
|
+
# Add collection_id, agent_run_id, transcript_id, transcript_group_id, and any other current attributes
|
|
584
|
+
span.set_attribute("collection_id", self.manager.collection_id)
|
|
585
|
+
|
|
586
|
+
# Set agent_run_id from context
|
|
587
|
+
try:
|
|
588
|
+
agent_run_id: str = self.manager._agent_run_id_var.get()
|
|
589
|
+
if agent_run_id:
|
|
590
|
+
span.set_attribute("agent_run_id", agent_run_id)
|
|
591
|
+
else:
|
|
592
|
+
span.set_attribute("agent_run_id_default", True)
|
|
593
|
+
span.set_attribute("agent_run_id", self.manager.default_agent_run_id)
|
|
594
|
+
except LookupError:
|
|
595
|
+
span.set_attribute("agent_run_id_default", True)
|
|
596
|
+
span.set_attribute("agent_run_id", self.manager.default_agent_run_id)
|
|
597
|
+
|
|
598
|
+
# Set transcript_group_id from context
|
|
599
|
+
try:
|
|
600
|
+
transcript_group_id: str = self.manager._transcript_group_id_var.get()
|
|
601
|
+
if transcript_group_id:
|
|
602
|
+
span.set_attribute("transcript_group_id", transcript_group_id)
|
|
603
|
+
except LookupError:
|
|
604
|
+
pass
|
|
605
|
+
|
|
606
|
+
# Set transcript_id from context
|
|
607
|
+
try:
|
|
608
|
+
transcript_id: str = self.manager._transcript_id_var.get()
|
|
609
|
+
if transcript_id:
|
|
610
|
+
span.set_attribute("transcript_id", transcript_id)
|
|
611
|
+
# Add atomic span order number
|
|
612
|
+
span_order: int = self.manager._next_span_order(transcript_id)
|
|
613
|
+
span.set_attribute("span_order", span_order)
|
|
614
|
+
except LookupError:
|
|
615
|
+
# transcript_id not available, skip it
|
|
616
|
+
pass
|
|
617
|
+
|
|
618
|
+
# Set custom attributes from context
|
|
619
|
+
try:
|
|
620
|
+
attributes: dict[str, Any] = self.manager._attributes_var.get()
|
|
621
|
+
for key, value in attributes.items():
|
|
622
|
+
span.set_attribute(key, value)
|
|
623
|
+
except LookupError:
|
|
624
|
+
# attributes not available, skip them
|
|
625
|
+
pass
|
|
626
|
+
|
|
627
|
+
# Debug logging for span creation
|
|
628
|
+
span_name = getattr(span, "name", "unknown")
|
|
629
|
+
span_attrs = getattr(span, "attributes", {})
|
|
630
|
+
logger.debug(
|
|
631
|
+
f"Created span: name='{span_name}', collection_id={self.manager.collection_id}, agent_run_id={span_attrs.get('agent_run_id')}, transcript_id={span_attrs.get('transcript_id')}"
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
self.manager._emit_pending_metadata_events(
|
|
635
|
+
span,
|
|
636
|
+
agent_run_id=span_attrs.get("agent_run_id"),
|
|
637
|
+
transcript_id=span_attrs.get("transcript_id"),
|
|
638
|
+
transcript_group_id=span_attrs.get("transcript_group_id"),
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
def on_end(self, span: ReadableSpan) -> None:
|
|
642
|
+
pass
|
|
643
|
+
|
|
644
|
+
def shutdown(self) -> None:
|
|
645
|
+
pass
|
|
646
|
+
|
|
647
|
+
def force_flush(self, timeout_millis: Optional[float] = None) -> bool:
|
|
648
|
+
return True
|
|
649
|
+
|
|
650
|
+
# Configure span exporters for our isolated provider
|
|
651
|
+
if self.enable_otlp_export:
|
|
652
|
+
otlp_exporters: List[Union[HTTPExporter, GRPCExporter]] = (
|
|
653
|
+
self._init_spans_exporters()
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
if otlp_exporters:
|
|
657
|
+
# Create a processor for each exporter
|
|
658
|
+
for exporter in otlp_exporters:
|
|
659
|
+
otlp_processor: Union[SimpleSpanProcessor, BatchSpanProcessor] = (
|
|
660
|
+
self._create_span_processor(exporter)
|
|
661
|
+
)
|
|
662
|
+
self._tracer_provider.add_span_processor(otlp_processor)
|
|
663
|
+
self._spans_processors.append(otlp_processor)
|
|
664
|
+
|
|
665
|
+
logger.info(
|
|
666
|
+
f"Added {len(otlp_exporters)} OTLP exporters for {len(self.endpoints)} endpoints"
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
if self.enable_console_export:
|
|
670
|
+
console_exporter: ConsoleSpanExporter = ConsoleSpanExporter()
|
|
671
|
+
console_processor: Union[SimpleSpanProcessor, BatchSpanProcessor] = (
|
|
672
|
+
self._create_span_processor(console_exporter)
|
|
673
|
+
)
|
|
674
|
+
self._tracer_provider.add_span_processor(console_processor)
|
|
675
|
+
self._spans_processors.append(console_processor)
|
|
676
|
+
|
|
677
|
+
# Add our custom context span processor
|
|
678
|
+
context_processor = ContextSpanProcessor(self)
|
|
679
|
+
self._tracer_provider.add_span_processor(context_processor)
|
|
680
|
+
|
|
681
|
+
# Get tracer from our isolated provider (don't set global provider)
|
|
682
|
+
self._tracer = self._tracer_provider.get_tracer(__name__)
|
|
683
|
+
|
|
684
|
+
# Instrument threading for better context propagation
|
|
685
|
+
try:
|
|
686
|
+
ThreadingInstrumentor().instrument()
|
|
687
|
+
except Exception as e:
|
|
688
|
+
logger.warning(f"Failed to instrument threading: {e}")
|
|
689
|
+
|
|
690
|
+
enabled_instruments = self.instruments - self.block_instruments
|
|
691
|
+
|
|
692
|
+
# Instrument OpenAI with our isolated tracer provider
|
|
693
|
+
if Instruments.OPENAI in enabled_instruments:
|
|
694
|
+
try:
|
|
695
|
+
if is_package_installed("openai"):
|
|
696
|
+
from opentelemetry.instrumentation.openai import OpenAIInstrumentor
|
|
697
|
+
|
|
698
|
+
OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
|
|
699
|
+
logger.info("Instrumented OpenAI")
|
|
700
|
+
except Exception as e:
|
|
701
|
+
logger.warning(f"Failed to instrument OpenAI: {e}")
|
|
702
|
+
|
|
703
|
+
# Instrument Anthropic with our isolated tracer provider
|
|
704
|
+
if Instruments.ANTHROPIC in enabled_instruments:
|
|
705
|
+
try:
|
|
706
|
+
if is_package_installed("anthropic"):
|
|
707
|
+
from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
|
|
708
|
+
|
|
709
|
+
AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
|
|
710
|
+
logger.info("Instrumented Anthropic")
|
|
711
|
+
except Exception as e:
|
|
712
|
+
logger.warning(f"Failed to instrument Anthropic: {e}")
|
|
713
|
+
|
|
714
|
+
# Instrument Bedrock with our isolated tracer provider
|
|
715
|
+
if Instruments.BEDROCK in enabled_instruments:
|
|
716
|
+
try:
|
|
717
|
+
if is_package_installed("boto3"):
|
|
718
|
+
from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
|
|
719
|
+
|
|
720
|
+
BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
|
|
721
|
+
logger.info("Instrumented Bedrock")
|
|
722
|
+
except Exception as e:
|
|
723
|
+
logger.warning(f"Failed to instrument Bedrock: {e}")
|
|
724
|
+
|
|
725
|
+
# Instrument LangChain with our isolated tracer provider
|
|
726
|
+
if Instruments.LANGCHAIN in enabled_instruments:
|
|
727
|
+
try:
|
|
728
|
+
if is_package_installed("langchain") or is_package_installed("langgraph"):
|
|
729
|
+
from opentelemetry.instrumentation.langchain import LangchainInstrumentor
|
|
730
|
+
|
|
731
|
+
LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
|
|
732
|
+
logger.info("Instrumented LangChain")
|
|
733
|
+
except Exception as e:
|
|
734
|
+
logger.warning(f"Failed to instrument LangChain: {e}")
|
|
735
|
+
|
|
736
|
+
# Instrument Google Generative AI with our isolated tracer provider
|
|
737
|
+
if Instruments.GOOGLE_GENERATIVEAI in enabled_instruments:
|
|
738
|
+
try:
|
|
739
|
+
if is_package_installed("google-generativeai") or is_package_installed(
|
|
740
|
+
"google-genai"
|
|
741
|
+
):
|
|
742
|
+
from opentelemetry.instrumentation.google_generativeai import (
|
|
743
|
+
GoogleGenerativeAiInstrumentor,
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
GoogleGenerativeAiInstrumentor().instrument(
|
|
747
|
+
tracer_provider=self._tracer_provider
|
|
748
|
+
)
|
|
749
|
+
logger.info("Instrumented Google Generative AI")
|
|
750
|
+
except Exception as e:
|
|
751
|
+
logger.warning(f"Failed to instrument Google Generative AI: {e}")
|
|
752
|
+
|
|
753
|
+
# Register cleanup handlers
|
|
754
|
+
self._register_cleanup()
|
|
755
|
+
|
|
756
|
+
self._initialized = True
|
|
757
|
+
logger.info(f"Docent tracing initialized for {self.collection_name}")
|
|
758
|
+
|
|
759
|
+
except Exception as e:
|
|
760
|
+
logger.error(f"Failed to initialize Docent tracing: {e}")
|
|
761
|
+
self._disabled = True
|
|
762
|
+
raise
|
|
763
|
+
|
|
764
|
+
def cleanup(self):
|
|
765
|
+
"""
|
|
766
|
+
Clean up Docent tracing resources.
|
|
767
|
+
|
|
768
|
+
Flushes all pending spans to exporters and shuts down the tracer provider.
|
|
769
|
+
This method is automatically called during application shutdown via atexit
|
|
770
|
+
handlers, but can also be called manually for explicit cleanup.
|
|
771
|
+
|
|
772
|
+
The cleanup process:
|
|
773
|
+
1. Flushes all span processors to ensure data is exported
|
|
774
|
+
2. Shuts down the tracer provider and releases resources
|
|
775
|
+
"""
|
|
776
|
+
if self.is_disabled():
|
|
777
|
+
return
|
|
778
|
+
|
|
779
|
+
try:
|
|
780
|
+
self.flush()
|
|
781
|
+
|
|
782
|
+
if self._tracer_provider:
|
|
783
|
+
self._tracer_provider.shutdown()
|
|
784
|
+
self._tracer_provider = None
|
|
785
|
+
except Exception as e:
|
|
786
|
+
logger.error(f"Error during cleanup: {e}")
|
|
787
|
+
|
|
788
|
+
def close(self):
|
|
789
|
+
"""Explicitly close the Docent tracing manager."""
|
|
790
|
+
if self.is_disabled():
|
|
791
|
+
return
|
|
792
|
+
|
|
793
|
+
try:
|
|
794
|
+
self.cleanup()
|
|
795
|
+
if self._cleanup_registered:
|
|
796
|
+
atexit.unregister(self.cleanup)
|
|
797
|
+
self._cleanup_registered = False
|
|
798
|
+
except Exception as e:
|
|
799
|
+
logger.error(f"Error during close: {e}")
|
|
800
|
+
|
|
801
|
+
def flush(self) -> None:
|
|
802
|
+
"""Force flush all spans to exporters."""
|
|
803
|
+
if self.is_disabled():
|
|
804
|
+
return
|
|
805
|
+
|
|
806
|
+
try:
|
|
807
|
+
logger.debug(f"Flushing {len(self._spans_processors)} span processors")
|
|
808
|
+
for i, processor in enumerate(self._spans_processors):
|
|
809
|
+
if hasattr(processor, "force_flush"):
|
|
810
|
+
logger.debug(f"Flushing span processor {i}")
|
|
811
|
+
processor.force_flush(timeout_millis=50)
|
|
812
|
+
logger.debug("Span flush completed")
|
|
813
|
+
except Exception as e:
|
|
814
|
+
logger.error(f"Error during flush: {e}")
|
|
815
|
+
|
|
816
|
+
def is_disabled(self) -> bool:
|
|
817
|
+
"""Check if tracing is disabled."""
|
|
818
|
+
return _global_tracing_disabled or self._disabled
|
|
819
|
+
|
|
820
|
+
def set_disabled(self, disabled: bool) -> None:
|
|
821
|
+
"""Enable or disable tracing."""
|
|
822
|
+
self._disabled = disabled
|
|
823
|
+
if disabled and self._initialized:
|
|
824
|
+
self.cleanup()
|
|
825
|
+
|
|
826
|
+
def is_initialized(self) -> bool:
|
|
827
|
+
"""Verify if the manager is properly initialized."""
|
|
828
|
+
return self._initialized
|
|
829
|
+
|
|
830
|
+
@contextmanager
|
|
831
|
+
def agent_run_context(
|
|
832
|
+
self,
|
|
833
|
+
agent_run_id: Optional[str] = None,
|
|
834
|
+
transcript_id: Optional[str] = None,
|
|
835
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
836
|
+
**attributes: Any,
|
|
837
|
+
) -> Iterator[tuple[str, str]]:
|
|
838
|
+
"""
|
|
839
|
+
Context manager for setting up an agent run context.
|
|
840
|
+
|
|
841
|
+
Args:
|
|
842
|
+
agent_run_id: Optional agent run ID (auto-generated if not provided)
|
|
843
|
+
transcript_id: Optional transcript ID (auto-generated if not provided)
|
|
844
|
+
metadata: Optional nested dictionary of metadata to send to backend
|
|
845
|
+
**attributes: Additional attributes to add to the context
|
|
846
|
+
|
|
847
|
+
Yields:
|
|
848
|
+
Tuple of (agent_run_id, transcript_id)
|
|
849
|
+
"""
|
|
850
|
+
if self.is_disabled():
|
|
851
|
+
agent_run_id = _get_disabled_agent_run_id(agent_run_id)
|
|
852
|
+
transcript_id = _get_disabled_transcript_id(transcript_id)
|
|
853
|
+
yield agent_run_id, transcript_id
|
|
854
|
+
return
|
|
855
|
+
|
|
856
|
+
if not self._initialized:
|
|
857
|
+
self.initialize()
|
|
858
|
+
|
|
859
|
+
if agent_run_id is not None and (not isinstance(agent_run_id, str) or not agent_run_id):
|
|
860
|
+
logger.error("Invalid agent_run_id for agent_run_context; generating a new ID.")
|
|
861
|
+
agent_run_id = str(uuid.uuid4())
|
|
862
|
+
elif agent_run_id is None:
|
|
863
|
+
agent_run_id = str(uuid.uuid4())
|
|
864
|
+
|
|
865
|
+
if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
|
|
866
|
+
logger.error(
|
|
867
|
+
"Invalid transcript_id for agent_run_context; generating a new transcript ID."
|
|
868
|
+
)
|
|
869
|
+
transcript_id = str(uuid.uuid4())
|
|
870
|
+
elif transcript_id is None:
|
|
871
|
+
transcript_id = str(uuid.uuid4())
|
|
872
|
+
|
|
873
|
+
# Set context variables for this execution context
|
|
874
|
+
agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
|
|
875
|
+
transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
|
|
876
|
+
attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
|
|
877
|
+
|
|
878
|
+
try:
|
|
879
|
+
# Send metadata directly to backend if provided
|
|
880
|
+
if metadata:
|
|
881
|
+
try:
|
|
882
|
+
self.send_agent_run_metadata(agent_run_id, metadata)
|
|
883
|
+
except Exception as e:
|
|
884
|
+
logger.error(f"Failed sending agent run metadata: {e}")
|
|
885
|
+
|
|
886
|
+
yield agent_run_id, transcript_id
|
|
887
|
+
finally:
|
|
888
|
+
transcript_group_id = self._get_optional_context_value(self._transcript_group_id_var)
|
|
889
|
+
self._flush_pending_metadata_events(
|
|
890
|
+
agent_run_id=agent_run_id,
|
|
891
|
+
transcript_id=transcript_id,
|
|
892
|
+
transcript_group_id=transcript_group_id,
|
|
893
|
+
)
|
|
894
|
+
self._agent_run_id_var.reset(agent_run_id_token)
|
|
895
|
+
self._transcript_id_var.reset(transcript_id_token)
|
|
896
|
+
self._attributes_var.reset(attributes_token)
|
|
897
|
+
|
|
898
|
+
@asynccontextmanager
|
|
899
|
+
async def async_agent_run_context(
|
|
900
|
+
self,
|
|
901
|
+
agent_run_id: Optional[str] = None,
|
|
902
|
+
transcript_id: Optional[str] = None,
|
|
903
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
904
|
+
**attributes: Any,
|
|
905
|
+
) -> AsyncIterator[tuple[str, str]]:
|
|
906
|
+
"""
|
|
907
|
+
Async context manager for setting up an agent run context.
|
|
908
|
+
Modifies the OpenTelemetry context so all spans inherit agent_run_id and transcript_id.
|
|
909
|
+
|
|
910
|
+
Args:
|
|
911
|
+
agent_run_id: Optional agent run ID (auto-generated if not provided)
|
|
912
|
+
transcript_id: Optional transcript ID (auto-generated if not provided)
|
|
913
|
+
metadata: Optional nested dictionary of metadata to send to backend
|
|
914
|
+
**attributes: Additional attributes to add to the context
|
|
915
|
+
|
|
916
|
+
Yields:
|
|
917
|
+
Tuple of (agent_run_id, transcript_id)
|
|
918
|
+
"""
|
|
919
|
+
if self.is_disabled():
|
|
920
|
+
agent_run_id = _get_disabled_agent_run_id(agent_run_id)
|
|
921
|
+
transcript_id = _get_disabled_transcript_id(transcript_id)
|
|
922
|
+
yield agent_run_id, transcript_id
|
|
923
|
+
return
|
|
924
|
+
|
|
925
|
+
if not self._initialized:
|
|
926
|
+
self.initialize()
|
|
927
|
+
|
|
928
|
+
if agent_run_id is not None and (not isinstance(agent_run_id, str) or not agent_run_id):
|
|
929
|
+
logger.error("Invalid agent_run_id for async_agent_run_context; generating a new ID.")
|
|
930
|
+
agent_run_id = str(uuid.uuid4())
|
|
931
|
+
elif agent_run_id is None:
|
|
932
|
+
agent_run_id = str(uuid.uuid4())
|
|
933
|
+
|
|
934
|
+
if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
|
|
935
|
+
logger.error(
|
|
936
|
+
"Invalid transcript_id for async_agent_run_context; generating a new transcript ID."
|
|
937
|
+
)
|
|
938
|
+
transcript_id = str(uuid.uuid4())
|
|
939
|
+
elif transcript_id is None:
|
|
940
|
+
transcript_id = str(uuid.uuid4())
|
|
941
|
+
|
|
942
|
+
# Set context variables for this execution context
|
|
943
|
+
agent_run_id_token: Token[str] = self._agent_run_id_var.set(agent_run_id)
|
|
944
|
+
transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
|
|
945
|
+
attributes_token: Token[dict[str, Any]] = self._attributes_var.set(attributes)
|
|
946
|
+
|
|
947
|
+
try:
|
|
948
|
+
# Send metadata directly to backend if provided
|
|
949
|
+
if metadata:
|
|
950
|
+
try:
|
|
951
|
+
self.send_agent_run_metadata(agent_run_id, metadata)
|
|
952
|
+
except Exception as e:
|
|
953
|
+
logger.warning(f"Failed sending agent run metadata: {e}")
|
|
954
|
+
|
|
955
|
+
yield agent_run_id, transcript_id
|
|
956
|
+
finally:
|
|
957
|
+
transcript_group_id = self._get_optional_context_value(self._transcript_group_id_var)
|
|
958
|
+
self._flush_pending_metadata_events(
|
|
959
|
+
agent_run_id=agent_run_id,
|
|
960
|
+
transcript_id=transcript_id,
|
|
961
|
+
transcript_group_id=transcript_group_id,
|
|
962
|
+
)
|
|
963
|
+
self._agent_run_id_var.reset(agent_run_id_token)
|
|
964
|
+
self._transcript_id_var.reset(transcript_id_token)
|
|
965
|
+
self._attributes_var.reset(attributes_token)
|
|
966
|
+
|
|
967
|
+
def _api_headers(self) -> Dict[str, str]:
|
|
968
|
+
"""
|
|
969
|
+
Get the API headers for HTTP requests.
|
|
970
|
+
|
|
971
|
+
Returns:
|
|
972
|
+
Headers including content type and any custom entries configured on the tracer
|
|
973
|
+
"""
|
|
974
|
+
# Copy configured headers so we don't mutate the original dict
|
|
975
|
+
headers = dict(self.headers)
|
|
976
|
+
# Ensure JSON payloads always advertise the correct content type
|
|
977
|
+
headers.setdefault("Content-Type", "application/json")
|
|
978
|
+
return headers
|
|
979
|
+
|
|
980
|
+
def _ensure_json_serializable_metadata(
|
|
981
|
+
self, metadata: Dict[str, Any], context: str
|
|
982
|
+
) -> Optional[Dict[str, Any]]:
|
|
983
|
+
"""
|
|
984
|
+
Validate that metadata can be serialized to JSON before sending it to the backend.
|
|
985
|
+
Returns a sanitized shallow copy so subsequent code never mutates the caller's object.
|
|
986
|
+
Any validation failure is logged and results in None so callers can skip sending metadata.
|
|
987
|
+
"""
|
|
988
|
+
if not isinstance(metadata, dict):
|
|
989
|
+
logger.error(
|
|
990
|
+
"%s metadata must be provided as a dict (got %s: %r). Skipping metadata payload.",
|
|
991
|
+
context,
|
|
992
|
+
type(metadata).__name__,
|
|
993
|
+
metadata,
|
|
994
|
+
)
|
|
995
|
+
return None
|
|
996
|
+
|
|
997
|
+
metadata_copy: Dict[str, Any] = {}
|
|
998
|
+
for key, value in metadata.items():
|
|
999
|
+
if not isinstance(key, str):
|
|
1000
|
+
logger.error(
|
|
1001
|
+
"%s metadata keys must be strings; skipping key %r (type %s).",
|
|
1002
|
+
context,
|
|
1003
|
+
key,
|
|
1004
|
+
type(key).__name__,
|
|
1005
|
+
)
|
|
1006
|
+
continue
|
|
1007
|
+
metadata_copy[key] = value
|
|
1008
|
+
|
|
1009
|
+
try:
|
|
1010
|
+
json.dumps(metadata_copy)
|
|
1011
|
+
except (TypeError, ValueError) as exc:
|
|
1012
|
+
logger.error(
|
|
1013
|
+
"%s metadata must be JSON serializable (%s). Skipping metadata payload: %r",
|
|
1014
|
+
context,
|
|
1015
|
+
exc,
|
|
1016
|
+
metadata,
|
|
1017
|
+
)
|
|
1018
|
+
return None
|
|
1019
|
+
offending_path = self._find_null_character_path(metadata_copy)
|
|
1020
|
+
if offending_path is not None:
|
|
1021
|
+
logger.error(
|
|
1022
|
+
"%s metadata cannot contain null characters (found at %s). "
|
|
1023
|
+
"Skipping metadata payload.",
|
|
1024
|
+
context,
|
|
1025
|
+
offending_path,
|
|
1026
|
+
)
|
|
1027
|
+
return None
|
|
1028
|
+
return metadata_copy
|
|
1029
|
+
|
|
1030
|
+
def _post_json(self, path: str, data: Dict[str, Any]) -> None:
|
|
1031
|
+
self._post_json_sync(path, data)
|
|
1032
|
+
|
|
1033
|
+
def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
|
|
1034
|
+
if not self._api_endpoint_base:
|
|
1035
|
+
message = "API endpoint base is not configured"
|
|
1036
|
+
logger.error(message)
|
|
1037
|
+
raise RuntimeError(message)
|
|
1038
|
+
url = f"{self._api_endpoint_base}{path}"
|
|
1039
|
+
try:
|
|
1040
|
+
resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
|
|
1041
|
+
resp.raise_for_status()
|
|
1042
|
+
except requests.exceptions.RequestException as exc:
|
|
1043
|
+
message = self._format_request_exception(url, exc)
|
|
1044
|
+
raise DocentTelemetryRequestError(message) from exc
|
|
1045
|
+
|
|
1046
|
+
def _format_request_exception(self, url: str, exc: requests.exceptions.RequestException) -> str:
|
|
1047
|
+
response: Optional[Response] = getattr(exc, "response", None)
|
|
1048
|
+
message_parts: List[str] = [f"Failed POST {url}"]
|
|
1049
|
+
suggestion: Optional[str]
|
|
1050
|
+
|
|
1051
|
+
if response is not None:
|
|
1052
|
+
status_phrase = f"HTTP {response.status_code}"
|
|
1053
|
+
if response.reason:
|
|
1054
|
+
status_phrase = f"{status_phrase} {response.reason}"
|
|
1055
|
+
message_parts.append(f"({status_phrase})")
|
|
1056
|
+
|
|
1057
|
+
detail = self._extract_response_detail(response)
|
|
1058
|
+
if detail:
|
|
1059
|
+
message_parts.append(f"- Backend detail: {detail}")
|
|
1060
|
+
|
|
1061
|
+
request_id = response.headers.get("x-request-id")
|
|
1062
|
+
if request_id:
|
|
1063
|
+
message_parts.append(f"(request-id: {request_id})")
|
|
1064
|
+
|
|
1065
|
+
suggestion = self._suggest_fix_for_status(response.status_code)
|
|
1066
|
+
else:
|
|
1067
|
+
message_parts.append(f"- {exc}")
|
|
1068
|
+
suggestion = self._suggest_fix_for_status(None)
|
|
1069
|
+
|
|
1070
|
+
if suggestion:
|
|
1071
|
+
message_parts.append(suggestion)
|
|
1072
|
+
|
|
1073
|
+
return " ".join(part for part in message_parts if part)
|
|
1074
|
+
|
|
1075
|
+
def _extract_response_detail(self, response: Response) -> Optional[str]:
|
|
1076
|
+
try:
|
|
1077
|
+
body = response.json()
|
|
1078
|
+
except ValueError:
|
|
1079
|
+
text = response.text.strip()
|
|
1080
|
+
if not text:
|
|
1081
|
+
return None
|
|
1082
|
+
normalized = " ".join(text.split())
|
|
1083
|
+
return self._truncate_error_message(normalized)
|
|
1084
|
+
|
|
1085
|
+
if isinstance(body, dict):
|
|
1086
|
+
typed_body = cast(Dict[str, Any], body)
|
|
1087
|
+
structured_message = self._structured_detail_message(typed_body)
|
|
1088
|
+
if structured_message:
|
|
1089
|
+
return self._truncate_error_message(structured_message)
|
|
1090
|
+
return self._truncate_error_message(self._normalize_error_value(typed_body))
|
|
1091
|
+
|
|
1092
|
+
return self._truncate_error_message(self._normalize_error_value(body))
|
|
1093
|
+
|
|
1094
|
+
def _structured_detail_message(self, data: Dict[str, Any]) -> Optional[str]:
|
|
1095
|
+
for key in ("detail", "message", "error"):
|
|
1096
|
+
if key in data:
|
|
1097
|
+
structured_value = self._structured_detail_value(data[key])
|
|
1098
|
+
if structured_value:
|
|
1099
|
+
return structured_value
|
|
1100
|
+
return self._structured_detail_value(data)
|
|
1101
|
+
|
|
1102
|
+
def _structured_detail_value(self, value: Any) -> Optional[str]:
|
|
1103
|
+
if isinstance(value, Mapping):
|
|
1104
|
+
mapping_value = cast(Mapping[str, Any], value)
|
|
1105
|
+
message = mapping_value.get("message")
|
|
1106
|
+
hint = mapping_value.get("hint")
|
|
1107
|
+
error_code = mapping_value.get("error_code")
|
|
1108
|
+
request_id = mapping_value.get("request_id")
|
|
1109
|
+
fallback_detail = mapping_value.get("detail")
|
|
1110
|
+
|
|
1111
|
+
parts: List[str] = []
|
|
1112
|
+
if isinstance(message, str) and message.strip():
|
|
1113
|
+
parts.append(message.strip())
|
|
1114
|
+
elif isinstance(fallback_detail, str) and fallback_detail.strip():
|
|
1115
|
+
parts.append(fallback_detail.strip())
|
|
1116
|
+
|
|
1117
|
+
if isinstance(hint, str) and hint.strip():
|
|
1118
|
+
parts.append(f"(hint: {hint.strip()})")
|
|
1119
|
+
if isinstance(error_code, str) and error_code.strip():
|
|
1120
|
+
parts.append(f"[code: {error_code.strip()}]")
|
|
1121
|
+
if isinstance(request_id, str) and request_id.strip():
|
|
1122
|
+
parts.append(f"(request-id: {request_id.strip()})")
|
|
1123
|
+
|
|
1124
|
+
return " ".join(parts) if parts else None
|
|
1125
|
+
|
|
1126
|
+
if isinstance(value, str) and value.strip():
|
|
1127
|
+
return value.strip()
|
|
1128
|
+
|
|
1129
|
+
return None
|
|
1130
|
+
|
|
1131
|
+
def _normalize_error_value(self, value: Any) -> str:
|
|
1132
|
+
if isinstance(value, str):
|
|
1133
|
+
return " ".join(value.split())
|
|
1134
|
+
|
|
1135
|
+
try:
|
|
1136
|
+
serialized = json.dumps(value)
|
|
1137
|
+
except (TypeError, ValueError):
|
|
1138
|
+
serialized = str(value)
|
|
1139
|
+
|
|
1140
|
+
return " ".join(serialized.split())
|
|
1141
|
+
|
|
1142
|
+
def _truncate_error_message(self, message: str) -> str:
|
|
1143
|
+
message = message.strip()
|
|
1144
|
+
if len(message) <= ERROR_DETAIL_MAX_CHARS:
|
|
1145
|
+
return message
|
|
1146
|
+
return f"{message[:ERROR_DETAIL_MAX_CHARS]}..."
|
|
1147
|
+
|
|
1148
|
+
def _suggest_fix_for_status(self, status_code: Optional[int]) -> Optional[str]:
|
|
1149
|
+
if status_code in (401, 403):
|
|
1150
|
+
return (
|
|
1151
|
+
"Verify that the Authorization header or DOCENT_API_KEY grants write access to the "
|
|
1152
|
+
"target collection."
|
|
1153
|
+
)
|
|
1154
|
+
if status_code == 404:
|
|
1155
|
+
return (
|
|
1156
|
+
"Ensure the tracing endpoint passed to initialize_tracing matches the Docent server's "
|
|
1157
|
+
"/rest/telemetry route."
|
|
1158
|
+
)
|
|
1159
|
+
if status_code in (400, 422):
|
|
1160
|
+
return (
|
|
1161
|
+
"Confirm the payload includes collection_id, agent_run_id, metadata, and timestamp in "
|
|
1162
|
+
"the expected format."
|
|
1163
|
+
)
|
|
1164
|
+
if status_code and status_code >= 500:
|
|
1165
|
+
return "Inspect the Docent backend logs for the referenced request."
|
|
1166
|
+
if status_code is None:
|
|
1167
|
+
return "Confirm the Docent telemetry endpoint is reachable from this process."
|
|
1168
|
+
return None
|
|
1169
|
+
|
|
1170
|
+
def _find_null_character_path(self, value: Any, path: str = "") -> Optional[str]:
|
|
1171
|
+
"""Backend rejects NUL bytes, so detect them before we send metadata to the backend."""
|
|
1172
|
+
if isinstance(value, str):
|
|
1173
|
+
if "\x00" in value or "\\u0000" in value or "\\x00" in value:
|
|
1174
|
+
return path or "<root>"
|
|
1175
|
+
return None
|
|
1176
|
+
|
|
1177
|
+
if isinstance(value, dict):
|
|
1178
|
+
typed_dict: Mapping[str, Any] = cast(Mapping[str, Any], value)
|
|
1179
|
+
for key, item in typed_dict.items():
|
|
1180
|
+
key_str = str(key)
|
|
1181
|
+
next_path = f"{path}.{key_str}" if path else key_str
|
|
1182
|
+
result = self._find_null_character_path(item, next_path)
|
|
1183
|
+
if result:
|
|
1184
|
+
return result
|
|
1185
|
+
return None
|
|
1186
|
+
|
|
1187
|
+
if isinstance(value, (list, tuple)):
|
|
1188
|
+
typed_sequence: Sequence[Any] = cast(Sequence[Any], value)
|
|
1189
|
+
for index, item in enumerate(typed_sequence):
|
|
1190
|
+
next_path = f"{path}[{index}]" if path else f"[{index}]"
|
|
1191
|
+
result = self._find_null_character_path(item, next_path)
|
|
1192
|
+
if result:
|
|
1193
|
+
return result
|
|
1194
|
+
return None
|
|
1195
|
+
|
|
1196
|
+
return None
|
|
1197
|
+
|
|
1198
|
+
def send_agent_run_score(
|
|
1199
|
+
self,
|
|
1200
|
+
agent_run_id: str,
|
|
1201
|
+
name: str,
|
|
1202
|
+
score: float,
|
|
1203
|
+
attributes: Optional[Dict[str, Any]] = None,
|
|
1204
|
+
) -> None:
|
|
1205
|
+
"""
|
|
1206
|
+
Send a score to the backend for a specific agent run.
|
|
1207
|
+
|
|
1208
|
+
Args:
|
|
1209
|
+
agent_run_id: The agent run ID
|
|
1210
|
+
name: Name of the score metric
|
|
1211
|
+
score: Numeric score value
|
|
1212
|
+
attributes: Optional additional attributes
|
|
1213
|
+
"""
|
|
1214
|
+
if self.is_disabled():
|
|
1215
|
+
return
|
|
1216
|
+
|
|
1217
|
+
collection_id = self.collection_id
|
|
1218
|
+
if not isinstance(agent_run_id, str) or not agent_run_id:
|
|
1219
|
+
logger.error("Cannot send agent run score without a valid agent_run_id.")
|
|
1220
|
+
return
|
|
1221
|
+
|
|
1222
|
+
if not isinstance(name, str) or not name:
|
|
1223
|
+
logger.error("Cannot send agent run score without a valid score name.")
|
|
1224
|
+
return
|
|
1225
|
+
|
|
1226
|
+
payload: Dict[str, Any] = {
|
|
1227
|
+
"collection_id": collection_id,
|
|
1228
|
+
"agent_run_id": agent_run_id,
|
|
1229
|
+
"score_name": name,
|
|
1230
|
+
"score_value": score,
|
|
1231
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
1232
|
+
}
|
|
1233
|
+
if attributes is not None:
|
|
1234
|
+
if not isinstance(attributes, dict):
|
|
1235
|
+
logger.error(
|
|
1236
|
+
"Score attributes must be provided as a dict (got %s: %r). Skipping attributes.",
|
|
1237
|
+
type(attributes).__name__,
|
|
1238
|
+
attributes,
|
|
1239
|
+
)
|
|
1240
|
+
else:
|
|
1241
|
+
sanitized_attributes: Dict[str, Any] = {}
|
|
1242
|
+
for attr_key, attr_value in attributes.items():
|
|
1243
|
+
if not isinstance(attr_key, str):
|
|
1244
|
+
logger.error(
|
|
1245
|
+
"Score attribute keys must be strings; skipping key %r of type %s.",
|
|
1246
|
+
attr_key,
|
|
1247
|
+
type(attr_key).__name__,
|
|
1248
|
+
)
|
|
1249
|
+
continue
|
|
1250
|
+
sanitized_attributes[attr_key] = attr_value
|
|
1251
|
+
payload.update(sanitized_attributes)
|
|
1252
|
+
self._post_json("/v1/scores", payload)
|
|
1253
|
+
|
|
1254
|
+
def send_agent_run_metadata(self, agent_run_id: str, metadata: Dict[str, Any]) -> None:
|
|
1255
|
+
if self.is_disabled():
|
|
1256
|
+
return
|
|
1257
|
+
|
|
1258
|
+
if not isinstance(agent_run_id, str) or not agent_run_id:
|
|
1259
|
+
logger.error("Cannot send agent run metadata without a valid agent_run_id.")
|
|
1260
|
+
return
|
|
1261
|
+
|
|
1262
|
+
metadata_payload = self._ensure_json_serializable_metadata(metadata, "Agent run")
|
|
1263
|
+
if metadata_payload is None:
|
|
1264
|
+
logger.error(
|
|
1265
|
+
"Skipping agent run metadata send for %s due to invalid metadata payload.",
|
|
1266
|
+
agent_run_id,
|
|
1267
|
+
)
|
|
1268
|
+
return
|
|
1269
|
+
|
|
1270
|
+
event = self._create_metadata_event(
|
|
1271
|
+
name="agent_run_metadata",
|
|
1272
|
+
metadata=metadata_payload,
|
|
1273
|
+
attributes={
|
|
1274
|
+
"collection_id": self.collection_id,
|
|
1275
|
+
"agent_run_id": agent_run_id,
|
|
1276
|
+
},
|
|
1277
|
+
)
|
|
1278
|
+
self._emit_or_queue_metadata_event(
|
|
1279
|
+
store=self._pending_agent_run_metadata_events,
|
|
1280
|
+
key=agent_run_id,
|
|
1281
|
+
event=event,
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
def send_transcript_metadata(
|
|
1285
|
+
self,
|
|
1286
|
+
transcript_id: str,
|
|
1287
|
+
name: Optional[str] = None,
|
|
1288
|
+
description: Optional[str] = None,
|
|
1289
|
+
transcript_group_id: Optional[str] = None,
|
|
1290
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1291
|
+
) -> None:
|
|
1292
|
+
"""
|
|
1293
|
+
Send transcript data to the backend.
|
|
1294
|
+
|
|
1295
|
+
Args:
|
|
1296
|
+
transcript_id: The transcript ID
|
|
1297
|
+
name: Optional transcript name
|
|
1298
|
+
description: Optional transcript description
|
|
1299
|
+
transcript_group_id: Optional transcript group ID
|
|
1300
|
+
metadata: Optional metadata to send
|
|
1301
|
+
"""
|
|
1302
|
+
if self.is_disabled():
|
|
1303
|
+
return
|
|
1304
|
+
|
|
1305
|
+
if not isinstance(transcript_id, str) or not transcript_id:
|
|
1306
|
+
logger.error("Cannot send transcript metadata without a valid transcript_id.")
|
|
1307
|
+
return
|
|
1308
|
+
|
|
1309
|
+
attributes: Dict[str, Any] = {
|
|
1310
|
+
"collection_id": self.collection_id,
|
|
1311
|
+
"transcript_id": transcript_id,
|
|
1312
|
+
"agent_run_id": self.get_current_agent_run_id(),
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
if name is not None:
|
|
1316
|
+
if isinstance(name, str):
|
|
1317
|
+
attributes["name"] = name
|
|
1318
|
+
else:
|
|
1319
|
+
logger.error("Transcript name must be a string; ignoring value %r.", name)
|
|
1320
|
+
if description is not None:
|
|
1321
|
+
if isinstance(description, str):
|
|
1322
|
+
attributes["description"] = description
|
|
1323
|
+
else:
|
|
1324
|
+
logger.error(
|
|
1325
|
+
"Transcript description must be a string; ignoring value %r.", description
|
|
1326
|
+
)
|
|
1327
|
+
if transcript_group_id is not None:
|
|
1328
|
+
if isinstance(transcript_group_id, str) and transcript_group_id:
|
|
1329
|
+
attributes["transcript_group_id"] = transcript_group_id
|
|
1330
|
+
else:
|
|
1331
|
+
logger.error(
|
|
1332
|
+
"transcript_group_id must be a non-empty string; ignoring value %r.",
|
|
1333
|
+
transcript_group_id,
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
metadata_payload: Optional[Dict[str, Any]] = None
|
|
1337
|
+
if metadata is not None:
|
|
1338
|
+
metadata_payload = self._ensure_json_serializable_metadata(metadata, "Transcript")
|
|
1339
|
+
if metadata_payload is None:
|
|
1340
|
+
logger.error(
|
|
1341
|
+
"Transcript %s metadata payload invalid; sending transcript data without metadata.",
|
|
1342
|
+
transcript_id,
|
|
1343
|
+
)
|
|
1344
|
+
|
|
1345
|
+
event = self._create_metadata_event(
|
|
1346
|
+
name="transcript_metadata",
|
|
1347
|
+
metadata=metadata_payload or {},
|
|
1348
|
+
attributes=attributes,
|
|
1349
|
+
)
|
|
1350
|
+
self._emit_or_queue_metadata_event(
|
|
1351
|
+
store=self._pending_transcript_metadata_events,
|
|
1352
|
+
key=transcript_id,
|
|
1353
|
+
event=event,
|
|
1354
|
+
)
|
|
1355
|
+
|
|
1356
|
+
def get_current_transcript_id(self) -> Optional[str]:
|
|
1357
|
+
"""
|
|
1358
|
+
Get the current transcript ID from context.
|
|
1359
|
+
|
|
1360
|
+
Returns:
|
|
1361
|
+
The current transcript ID if available, None otherwise
|
|
1362
|
+
"""
|
|
1363
|
+
try:
|
|
1364
|
+
return self._transcript_id_var.get()
|
|
1365
|
+
except LookupError:
|
|
1366
|
+
return None
|
|
1367
|
+
|
|
1368
|
+
def get_current_transcript_group_id(self) -> Optional[str]:
|
|
1369
|
+
"""
|
|
1370
|
+
Get the current transcript group ID from context.
|
|
1371
|
+
|
|
1372
|
+
Returns:
|
|
1373
|
+
The current transcript group ID if available, None otherwise
|
|
1374
|
+
"""
|
|
1375
|
+
try:
|
|
1376
|
+
return self._transcript_group_id_var.get()
|
|
1377
|
+
except LookupError:
|
|
1378
|
+
return None
|
|
1379
|
+
|
|
1380
|
+
@contextmanager
|
|
1381
|
+
def transcript_context(
|
|
1382
|
+
self,
|
|
1383
|
+
name: Optional[str] = None,
|
|
1384
|
+
transcript_id: Optional[str] = None,
|
|
1385
|
+
description: Optional[str] = None,
|
|
1386
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1387
|
+
transcript_group_id: Optional[str] = None,
|
|
1388
|
+
) -> Iterator[str]:
|
|
1389
|
+
"""
|
|
1390
|
+
Context manager for setting up a transcript context.
|
|
1391
|
+
|
|
1392
|
+
Args:
|
|
1393
|
+
name: Optional transcript name
|
|
1394
|
+
transcript_id: Optional transcript ID (auto-generated if not provided)
|
|
1395
|
+
description: Optional transcript description
|
|
1396
|
+
metadata: Optional metadata to send to backend
|
|
1397
|
+
transcript_group_id: Optional transcript group ID
|
|
1398
|
+
|
|
1399
|
+
Yields:
|
|
1400
|
+
The transcript ID
|
|
1401
|
+
"""
|
|
1402
|
+
if self.is_disabled():
|
|
1403
|
+
transcript_id = _get_disabled_transcript_id(transcript_id)
|
|
1404
|
+
yield transcript_id
|
|
1405
|
+
return
|
|
1406
|
+
|
|
1407
|
+
if not self._initialized:
|
|
1408
|
+
message = "Tracer is not initialized. Call initialize_tracing() before using transcript context."
|
|
1409
|
+
logger.error(message)
|
|
1410
|
+
raise RuntimeError(message)
|
|
1411
|
+
|
|
1412
|
+
if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
|
|
1413
|
+
logger.error(
|
|
1414
|
+
"Invalid transcript_id for transcript_context; generating a new transcript ID."
|
|
1415
|
+
)
|
|
1416
|
+
transcript_id = str(uuid.uuid4())
|
|
1417
|
+
elif transcript_id is None:
|
|
1418
|
+
transcript_id = str(uuid.uuid4())
|
|
1419
|
+
|
|
1420
|
+
# Determine transcript group ID before setting new context
|
|
1421
|
+
if transcript_group_id is None:
|
|
1422
|
+
try:
|
|
1423
|
+
transcript_group_id = self._transcript_group_id_var.get()
|
|
1424
|
+
except LookupError:
|
|
1425
|
+
# No current transcript group context, this transcript has no group
|
|
1426
|
+
transcript_group_id = None
|
|
1427
|
+
else:
|
|
1428
|
+
if isinstance(transcript_group_id, str) and transcript_group_id:
|
|
1429
|
+
pass
|
|
1430
|
+
else:
|
|
1431
|
+
logger.error(
|
|
1432
|
+
"Invalid transcript_group_id for transcript_context; ignoring value %r.",
|
|
1433
|
+
transcript_group_id,
|
|
1434
|
+
)
|
|
1435
|
+
transcript_group_id = None
|
|
1436
|
+
|
|
1437
|
+
# Set context variable for this execution context
|
|
1438
|
+
transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
|
|
1439
|
+
|
|
1440
|
+
try:
|
|
1441
|
+
# Send transcript data and metadata to backend
|
|
1442
|
+
try:
|
|
1443
|
+
self.send_transcript_metadata(
|
|
1444
|
+
transcript_id, name, description, transcript_group_id, metadata
|
|
1445
|
+
)
|
|
1446
|
+
except Exception as e:
|
|
1447
|
+
logger.error(f"Failed sending transcript data: {e}")
|
|
1448
|
+
|
|
1449
|
+
yield transcript_id
|
|
1450
|
+
finally:
|
|
1451
|
+
agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
|
|
1452
|
+
transcript_group_id_for_flush = self._get_optional_context_value(
|
|
1453
|
+
self._transcript_group_id_var
|
|
1454
|
+
)
|
|
1455
|
+
self._flush_pending_metadata_events(
|
|
1456
|
+
agent_run_id=agent_run_id_for_flush,
|
|
1457
|
+
transcript_id=transcript_id,
|
|
1458
|
+
transcript_group_id=transcript_group_id_for_flush,
|
|
1459
|
+
)
|
|
1460
|
+
# Reset context variable to previous state
|
|
1461
|
+
self._transcript_id_var.reset(transcript_id_token)
|
|
1462
|
+
|
|
1463
|
+
@asynccontextmanager
|
|
1464
|
+
async def async_transcript_context(
|
|
1465
|
+
self,
|
|
1466
|
+
name: Optional[str] = None,
|
|
1467
|
+
transcript_id: Optional[str] = None,
|
|
1468
|
+
description: Optional[str] = None,
|
|
1469
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1470
|
+
transcript_group_id: Optional[str] = None,
|
|
1471
|
+
) -> AsyncIterator[str]:
|
|
1472
|
+
"""
|
|
1473
|
+
Async context manager for setting up a transcript context.
|
|
1474
|
+
|
|
1475
|
+
Args:
|
|
1476
|
+
name: Optional transcript name
|
|
1477
|
+
transcript_id: Optional transcript ID (auto-generated if not provided)
|
|
1478
|
+
description: Optional transcript description
|
|
1479
|
+
metadata: Optional metadata to send to backend
|
|
1480
|
+
transcript_group_id: Optional transcript group ID
|
|
1481
|
+
|
|
1482
|
+
Yields:
|
|
1483
|
+
The transcript ID
|
|
1484
|
+
"""
|
|
1485
|
+
if self.is_disabled():
|
|
1486
|
+
transcript_id = _get_disabled_transcript_id(transcript_id)
|
|
1487
|
+
yield transcript_id
|
|
1488
|
+
return
|
|
1489
|
+
|
|
1490
|
+
if not self._initialized:
|
|
1491
|
+
message = "Tracer is not initialized. Call initialize_tracing() before using transcript context."
|
|
1492
|
+
logger.error(message)
|
|
1493
|
+
raise RuntimeError(message)
|
|
1494
|
+
|
|
1495
|
+
if transcript_id is not None and (not isinstance(transcript_id, str) or not transcript_id):
|
|
1496
|
+
logger.error(
|
|
1497
|
+
"Invalid transcript_id for async_transcript_context; generating a new transcript ID."
|
|
1498
|
+
)
|
|
1499
|
+
transcript_id = str(uuid.uuid4())
|
|
1500
|
+
elif transcript_id is None:
|
|
1501
|
+
transcript_id = str(uuid.uuid4())
|
|
1502
|
+
|
|
1503
|
+
# Determine transcript group ID before setting new context
|
|
1504
|
+
if transcript_group_id is None:
|
|
1505
|
+
try:
|
|
1506
|
+
transcript_group_id = self._transcript_group_id_var.get()
|
|
1507
|
+
except LookupError:
|
|
1508
|
+
# No current transcript group context, this transcript has no group
|
|
1509
|
+
transcript_group_id = None
|
|
1510
|
+
else:
|
|
1511
|
+
if isinstance(transcript_group_id, str) and transcript_group_id:
|
|
1512
|
+
pass
|
|
1513
|
+
else:
|
|
1514
|
+
logger.error(
|
|
1515
|
+
"Invalid transcript_group_id for async_transcript_context; ignoring value %r.",
|
|
1516
|
+
transcript_group_id,
|
|
1517
|
+
)
|
|
1518
|
+
transcript_group_id = None
|
|
1519
|
+
|
|
1520
|
+
# Set context variable for this execution context
|
|
1521
|
+
transcript_id_token: Token[str] = self._transcript_id_var.set(transcript_id)
|
|
1522
|
+
|
|
1523
|
+
try:
|
|
1524
|
+
# Send transcript data and metadata to backend
|
|
1525
|
+
try:
|
|
1526
|
+
self.send_transcript_metadata(
|
|
1527
|
+
transcript_id, name, description, transcript_group_id, metadata
|
|
1528
|
+
)
|
|
1529
|
+
except Exception as e:
|
|
1530
|
+
logger.error(f"Failed sending transcript data: {e}")
|
|
1531
|
+
|
|
1532
|
+
yield transcript_id
|
|
1533
|
+
finally:
|
|
1534
|
+
agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
|
|
1535
|
+
transcript_group_id_for_flush = self._get_optional_context_value(
|
|
1536
|
+
self._transcript_group_id_var
|
|
1537
|
+
)
|
|
1538
|
+
self._flush_pending_metadata_events(
|
|
1539
|
+
agent_run_id=agent_run_id_for_flush,
|
|
1540
|
+
transcript_id=transcript_id,
|
|
1541
|
+
transcript_group_id=transcript_group_id_for_flush,
|
|
1542
|
+
)
|
|
1543
|
+
# Reset context variable to previous state
|
|
1544
|
+
self._transcript_id_var.reset(transcript_id_token)
|
|
1545
|
+
|
|
1546
|
+
def send_transcript_group_metadata(
|
|
1547
|
+
self,
|
|
1548
|
+
transcript_group_id: str,
|
|
1549
|
+
name: Optional[str] = None,
|
|
1550
|
+
description: Optional[str] = None,
|
|
1551
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
1552
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1553
|
+
) -> None:
|
|
1554
|
+
"""
|
|
1555
|
+
Send transcript group data to the backend.
|
|
1556
|
+
|
|
1557
|
+
Args:
|
|
1558
|
+
transcript_group_id: The transcript group ID
|
|
1559
|
+
name: Optional transcript group name
|
|
1560
|
+
description: Optional transcript group description
|
|
1561
|
+
parent_transcript_group_id: Optional parent transcript group ID
|
|
1562
|
+
metadata: Optional metadata to send
|
|
1563
|
+
"""
|
|
1564
|
+
if self.is_disabled():
|
|
1565
|
+
return
|
|
1566
|
+
|
|
1567
|
+
if not isinstance(transcript_group_id, str) or not transcript_group_id:
|
|
1568
|
+
logger.error(
|
|
1569
|
+
"Cannot send transcript group metadata without a valid transcript_group_id."
|
|
1570
|
+
)
|
|
1571
|
+
return
|
|
1572
|
+
|
|
1573
|
+
collection_id = self.collection_id
|
|
1574
|
+
|
|
1575
|
+
# Get agent_run_id from current context
|
|
1576
|
+
agent_run_id = self.get_current_agent_run_id()
|
|
1577
|
+
if not agent_run_id:
|
|
1578
|
+
logger.error(
|
|
1579
|
+
f"Cannot send transcript group metadata for {transcript_group_id} - no agent_run_id in context"
|
|
1580
|
+
)
|
|
1581
|
+
return
|
|
1582
|
+
|
|
1583
|
+
with self._transcript_group_state_lock:
|
|
1584
|
+
state: dict[str, Optional[str]] = self._transcript_group_states.setdefault(
|
|
1585
|
+
transcript_group_id, {}
|
|
1586
|
+
)
|
|
1587
|
+
if name is not None:
|
|
1588
|
+
if isinstance(name, str):
|
|
1589
|
+
final_name = name
|
|
1590
|
+
else:
|
|
1591
|
+
logger.error(
|
|
1592
|
+
"Transcript group name must be a string; ignoring value %r.",
|
|
1593
|
+
name,
|
|
1594
|
+
)
|
|
1595
|
+
final_name = state.get("name")
|
|
1596
|
+
else:
|
|
1597
|
+
final_name = state.get("name")
|
|
1598
|
+
|
|
1599
|
+
if description is not None:
|
|
1600
|
+
if isinstance(description, str):
|
|
1601
|
+
final_description = description
|
|
1602
|
+
else:
|
|
1603
|
+
logger.error(
|
|
1604
|
+
"Transcript group description must be a string; ignoring value %r.",
|
|
1605
|
+
description,
|
|
1606
|
+
)
|
|
1607
|
+
final_description = state.get("description")
|
|
1608
|
+
else:
|
|
1609
|
+
final_description = state.get("description")
|
|
1610
|
+
|
|
1611
|
+
if parent_transcript_group_id is not None:
|
|
1612
|
+
if isinstance(parent_transcript_group_id, str) and parent_transcript_group_id:
|
|
1613
|
+
final_parent_transcript_group_id = parent_transcript_group_id
|
|
1614
|
+
else:
|
|
1615
|
+
logger.error(
|
|
1616
|
+
"parent_transcript_group_id must be a non-empty string; ignoring value %r.",
|
|
1617
|
+
parent_transcript_group_id,
|
|
1618
|
+
)
|
|
1619
|
+
final_parent_transcript_group_id = state.get("parent_transcript_group_id")
|
|
1620
|
+
else:
|
|
1621
|
+
final_parent_transcript_group_id = state.get("parent_transcript_group_id")
|
|
1622
|
+
|
|
1623
|
+
if final_name is not None:
|
|
1624
|
+
state["name"] = final_name
|
|
1625
|
+
if final_description is not None:
|
|
1626
|
+
state["description"] = final_description
|
|
1627
|
+
if final_parent_transcript_group_id is not None:
|
|
1628
|
+
state["parent_transcript_group_id"] = final_parent_transcript_group_id
|
|
1629
|
+
|
|
1630
|
+
attributes: Dict[str, Any] = {
|
|
1631
|
+
"collection_id": collection_id,
|
|
1632
|
+
"transcript_group_id": transcript_group_id,
|
|
1633
|
+
"agent_run_id": agent_run_id,
|
|
1634
|
+
}
|
|
1635
|
+
if final_name is not None:
|
|
1636
|
+
attributes["name"] = final_name
|
|
1637
|
+
if final_description is not None:
|
|
1638
|
+
attributes["description"] = final_description
|
|
1639
|
+
if final_parent_transcript_group_id is not None:
|
|
1640
|
+
attributes["parent_transcript_group_id"] = final_parent_transcript_group_id
|
|
1641
|
+
|
|
1642
|
+
metadata_payload: Optional[Dict[str, Any]] = None
|
|
1643
|
+
if metadata is not None:
|
|
1644
|
+
metadata_payload = self._ensure_json_serializable_metadata(metadata, "Transcript group")
|
|
1645
|
+
if metadata_payload is None:
|
|
1646
|
+
logger.error(
|
|
1647
|
+
"Transcript group %s metadata payload invalid; sending group data without metadata.",
|
|
1648
|
+
transcript_group_id,
|
|
1649
|
+
)
|
|
1650
|
+
|
|
1651
|
+
event = self._create_metadata_event(
|
|
1652
|
+
name="transcript_group_metadata",
|
|
1653
|
+
metadata=metadata_payload or {},
|
|
1654
|
+
attributes=attributes,
|
|
1655
|
+
)
|
|
1656
|
+
self._emit_or_queue_metadata_event(
|
|
1657
|
+
store=self._pending_transcript_group_metadata_events,
|
|
1658
|
+
key=agent_run_id,
|
|
1659
|
+
event=event,
|
|
1660
|
+
)
|
|
1661
|
+
|
|
1662
|
+
@contextmanager
|
|
1663
|
+
def transcript_group_context(
|
|
1664
|
+
self,
|
|
1665
|
+
name: Optional[str] = None,
|
|
1666
|
+
transcript_group_id: Optional[str] = None,
|
|
1667
|
+
description: Optional[str] = None,
|
|
1668
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1669
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
1670
|
+
) -> Iterator[str]:
|
|
1671
|
+
"""
|
|
1672
|
+
Context manager for setting up a transcript group context.
|
|
1673
|
+
|
|
1674
|
+
Args:
|
|
1675
|
+
name: Optional transcript group name
|
|
1676
|
+
transcript_group_id: Optional transcript group ID (auto-generated if not provided)
|
|
1677
|
+
description: Optional transcript group description
|
|
1678
|
+
metadata: Optional metadata to send to backend
|
|
1679
|
+
parent_transcript_group_id: Optional parent transcript group ID
|
|
1680
|
+
|
|
1681
|
+
Yields:
|
|
1682
|
+
The transcript group ID
|
|
1683
|
+
"""
|
|
1684
|
+
if self.is_disabled():
|
|
1685
|
+
transcript_group_id = _get_disabled_transcript_group_id(transcript_group_id)
|
|
1686
|
+
yield transcript_group_id
|
|
1687
|
+
return
|
|
1688
|
+
|
|
1689
|
+
if not self._initialized:
|
|
1690
|
+
message = "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
|
|
1691
|
+
logger.error(message)
|
|
1692
|
+
raise RuntimeError(message)
|
|
1693
|
+
|
|
1694
|
+
if transcript_group_id is not None and (
|
|
1695
|
+
not isinstance(transcript_group_id, str) or not transcript_group_id
|
|
1696
|
+
):
|
|
1697
|
+
logger.error(
|
|
1698
|
+
"Invalid transcript_group_id for transcript_group_context; generating a new ID."
|
|
1699
|
+
)
|
|
1700
|
+
transcript_group_id = str(uuid.uuid4())
|
|
1701
|
+
elif transcript_group_id is None:
|
|
1702
|
+
transcript_group_id = str(uuid.uuid4())
|
|
1703
|
+
|
|
1704
|
+
# Determine parent transcript group ID before setting new context
|
|
1705
|
+
if parent_transcript_group_id is None:
|
|
1706
|
+
try:
|
|
1707
|
+
parent_transcript_group_id = self._transcript_group_id_var.get()
|
|
1708
|
+
except LookupError:
|
|
1709
|
+
# No current transcript group context, this becomes a root group
|
|
1710
|
+
parent_transcript_group_id = None
|
|
1711
|
+
else:
|
|
1712
|
+
if isinstance(parent_transcript_group_id, str) and parent_transcript_group_id:
|
|
1713
|
+
pass
|
|
1714
|
+
else:
|
|
1715
|
+
logger.error(
|
|
1716
|
+
"Invalid parent_transcript_group_id for transcript_group_context; ignoring value %r.",
|
|
1717
|
+
parent_transcript_group_id,
|
|
1718
|
+
)
|
|
1719
|
+
parent_transcript_group_id = None
|
|
1720
|
+
|
|
1721
|
+
# Set context variable for this execution context
|
|
1722
|
+
transcript_group_id_token: Token[str] = self._transcript_group_id_var.set(
|
|
1723
|
+
transcript_group_id
|
|
1724
|
+
)
|
|
1725
|
+
|
|
1726
|
+
try:
|
|
1727
|
+
# Send transcript group data and metadata to backend
|
|
1728
|
+
try:
|
|
1729
|
+
self.send_transcript_group_metadata(
|
|
1730
|
+
transcript_group_id, name, description, parent_transcript_group_id, metadata
|
|
1731
|
+
)
|
|
1732
|
+
except Exception as e:
|
|
1733
|
+
logger.error(f"Failed sending transcript group data: {e}")
|
|
1734
|
+
|
|
1735
|
+
yield transcript_group_id
|
|
1736
|
+
finally:
|
|
1737
|
+
agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
|
|
1738
|
+
transcript_id_for_flush = self._get_optional_context_value(self._transcript_id_var)
|
|
1739
|
+
self._flush_pending_metadata_events(
|
|
1740
|
+
agent_run_id=agent_run_id_for_flush,
|
|
1741
|
+
transcript_id=transcript_id_for_flush,
|
|
1742
|
+
transcript_group_id=transcript_group_id,
|
|
1743
|
+
)
|
|
1744
|
+
# Reset context variable to previous state
|
|
1745
|
+
self._transcript_group_id_var.reset(transcript_group_id_token)
|
|
1746
|
+
|
|
1747
|
+
@asynccontextmanager
|
|
1748
|
+
async def async_transcript_group_context(
|
|
1749
|
+
self,
|
|
1750
|
+
name: Optional[str] = None,
|
|
1751
|
+
transcript_group_id: Optional[str] = None,
|
|
1752
|
+
description: Optional[str] = None,
|
|
1753
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
1754
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
1755
|
+
) -> AsyncIterator[str]:
|
|
1756
|
+
"""
|
|
1757
|
+
Async context manager for setting up a transcript group context.
|
|
1758
|
+
|
|
1759
|
+
Args:
|
|
1760
|
+
name: Optional transcript group name
|
|
1761
|
+
transcript_group_id: Optional transcript group ID (auto-generated if not provided)
|
|
1762
|
+
description: Optional transcript group description
|
|
1763
|
+
metadata: Optional metadata to send to backend
|
|
1764
|
+
parent_transcript_group_id: Optional parent transcript group ID
|
|
1765
|
+
|
|
1766
|
+
Yields:
|
|
1767
|
+
The transcript group ID
|
|
1768
|
+
"""
|
|
1769
|
+
if self.is_disabled():
|
|
1770
|
+
transcript_group_id = _get_disabled_transcript_group_id(transcript_group_id)
|
|
1771
|
+
yield transcript_group_id
|
|
1772
|
+
return
|
|
1773
|
+
|
|
1774
|
+
if not self._initialized:
|
|
1775
|
+
message = "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
|
|
1776
|
+
logger.error(message)
|
|
1777
|
+
raise RuntimeError(message)
|
|
1778
|
+
|
|
1779
|
+
if transcript_group_id is not None and (
|
|
1780
|
+
not isinstance(transcript_group_id, str) or not transcript_group_id
|
|
1781
|
+
):
|
|
1782
|
+
logger.error(
|
|
1783
|
+
"Invalid transcript_group_id for async_transcript_group_context; generating a new ID."
|
|
1784
|
+
)
|
|
1785
|
+
transcript_group_id = str(uuid.uuid4())
|
|
1786
|
+
elif transcript_group_id is None:
|
|
1787
|
+
transcript_group_id = str(uuid.uuid4())
|
|
1788
|
+
|
|
1789
|
+
# Determine parent transcript group ID before setting new context
|
|
1790
|
+
if parent_transcript_group_id is None:
|
|
1791
|
+
try:
|
|
1792
|
+
parent_transcript_group_id = self._transcript_group_id_var.get()
|
|
1793
|
+
except LookupError:
|
|
1794
|
+
# No current transcript group context, this becomes a root group
|
|
1795
|
+
parent_transcript_group_id = None
|
|
1796
|
+
else:
|
|
1797
|
+
if isinstance(parent_transcript_group_id, str) and parent_transcript_group_id:
|
|
1798
|
+
pass
|
|
1799
|
+
else:
|
|
1800
|
+
logger.error(
|
|
1801
|
+
"Invalid parent_transcript_group_id for async_transcript_group_context; ignoring value %r.",
|
|
1802
|
+
parent_transcript_group_id,
|
|
1803
|
+
)
|
|
1804
|
+
parent_transcript_group_id = None
|
|
1805
|
+
|
|
1806
|
+
# Set context variable for this execution context
|
|
1807
|
+
transcript_group_id_token: Token[str] = self._transcript_group_id_var.set(
|
|
1808
|
+
transcript_group_id
|
|
1809
|
+
)
|
|
1810
|
+
|
|
1811
|
+
try:
|
|
1812
|
+
# Send transcript group data and metadata to backend
|
|
1813
|
+
try:
|
|
1814
|
+
self.send_transcript_group_metadata(
|
|
1815
|
+
transcript_group_id, name, description, parent_transcript_group_id, metadata
|
|
1816
|
+
)
|
|
1817
|
+
except Exception as e:
|
|
1818
|
+
logger.error(f"Failed sending transcript group data: {e}")
|
|
1819
|
+
|
|
1820
|
+
yield transcript_group_id
|
|
1821
|
+
finally:
|
|
1822
|
+
agent_run_id_for_flush = self._get_optional_context_value(self._agent_run_id_var)
|
|
1823
|
+
transcript_id_for_flush = self._get_optional_context_value(self._transcript_id_var)
|
|
1824
|
+
self._flush_pending_metadata_events(
|
|
1825
|
+
agent_run_id=agent_run_id_for_flush,
|
|
1826
|
+
transcript_id=transcript_id_for_flush,
|
|
1827
|
+
transcript_group_id=transcript_group_id,
|
|
1828
|
+
)
|
|
1829
|
+
# Reset context variable to previous state
|
|
1830
|
+
self._transcript_group_id_var.reset(transcript_group_id_token)
|
|
1831
|
+
|
|
1832
|
+
def _send_trace_done(self) -> None:
|
|
1833
|
+
if self.is_disabled():
|
|
1834
|
+
return
|
|
1835
|
+
|
|
1836
|
+
collection_id = self.collection_id
|
|
1837
|
+
payload: Dict[str, Any] = {
|
|
1838
|
+
"collection_id": collection_id,
|
|
1839
|
+
"status": "completed",
|
|
1840
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
1841
|
+
}
|
|
1842
|
+
try:
|
|
1843
|
+
self._post_json("/v1/trace-done", payload)
|
|
1844
|
+
except Exception as exc:
|
|
1845
|
+
logger.error(f"Failed to send trace completion signal: {exc}")
|
|
1846
|
+
|
|
1847
|
+
|
|
1848
|
+
_global_tracer: Optional[DocentTracer] = None
|
|
1849
|
+
_global_tracing_disabled: bool = os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
|
|
1850
|
+
|
|
1851
|
+
|
|
1852
|
+
def initialize_tracing(
|
|
1853
|
+
collection_name: str = DEFAULT_COLLECTION_NAME,
|
|
1854
|
+
collection_id: Optional[str] = None,
|
|
1855
|
+
endpoint: Union[str, List[str]] = DEFAULT_ENDPOINT,
|
|
1856
|
+
headers: Optional[Dict[str, str]] = None,
|
|
1857
|
+
api_key: Optional[str] = None,
|
|
1858
|
+
enable_console_export: bool = False,
|
|
1859
|
+
enable_otlp_export: bool = True,
|
|
1860
|
+
disable_batch: bool = False,
|
|
1861
|
+
instruments: Optional[Set[Instruments]] = None,
|
|
1862
|
+
block_instruments: Optional[Set[Instruments]] = None,
|
|
1863
|
+
) -> DocentTracer:
|
|
1864
|
+
"""
|
|
1865
|
+
Initialize the global Docent tracer.
|
|
1866
|
+
|
|
1867
|
+
This is the primary entry point for setting up Docent tracing.
|
|
1868
|
+
It creates a global singleton instance that can be accessed via get_tracer().
|
|
1869
|
+
|
|
1870
|
+
Args:
|
|
1871
|
+
collection_name: Name of the collection
|
|
1872
|
+
collection_id: Optional collection ID (auto-generated if not provided)
|
|
1873
|
+
endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
|
|
1874
|
+
headers: Optional headers for authentication
|
|
1875
|
+
api_key: Optional API key for bearer token authentication (takes precedence
|
|
1876
|
+
over DOCENT_API_KEY environment variable)
|
|
1877
|
+
enable_console_export: Whether to export spans to console for debugging
|
|
1878
|
+
enable_otlp_export: Whether to export spans to OTLP endpoint
|
|
1879
|
+
disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
|
|
1880
|
+
instruments: Set of instruments to enable (None = all instruments).
|
|
1881
|
+
block_instruments: Set of instruments to explicitly disable.
|
|
1882
|
+
|
|
1883
|
+
Returns:
|
|
1884
|
+
The initialized Docent tracer
|
|
1885
|
+
|
|
1886
|
+
Example:
|
|
1887
|
+
initialize_tracing("my-collection")
|
|
1888
|
+
"""
|
|
1889
|
+
|
|
1890
|
+
global _global_tracer
|
|
1891
|
+
|
|
1892
|
+
# Check for API key in environment variable if not provided as parameter
|
|
1893
|
+
if api_key is None:
|
|
1894
|
+
env_api_key: Optional[str] = os.environ.get("DOCENT_API_KEY")
|
|
1895
|
+
api_key = env_api_key
|
|
1896
|
+
|
|
1897
|
+
if _global_tracer is None:
|
|
1898
|
+
_global_tracer = DocentTracer(
|
|
1899
|
+
collection_name=collection_name,
|
|
1900
|
+
collection_id=collection_id,
|
|
1901
|
+
endpoint=endpoint,
|
|
1902
|
+
headers=headers,
|
|
1903
|
+
api_key=api_key,
|
|
1904
|
+
enable_console_export=enable_console_export,
|
|
1905
|
+
enable_otlp_export=enable_otlp_export,
|
|
1906
|
+
disable_batch=disable_batch,
|
|
1907
|
+
instruments=instruments,
|
|
1908
|
+
block_instruments=block_instruments,
|
|
1909
|
+
)
|
|
1910
|
+
_global_tracer.initialize()
|
|
1911
|
+
|
|
1912
|
+
return _global_tracer
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
def _get_package_name(dist: Distribution) -> str | None:
|
|
1916
|
+
try:
|
|
1917
|
+
return dist.name.lower()
|
|
1918
|
+
except (KeyError, AttributeError):
|
|
1919
|
+
return None
|
|
1920
|
+
|
|
1921
|
+
|
|
1922
|
+
installed_packages = {
|
|
1923
|
+
name for dist in distributions() if (name := _get_package_name(dist)) is not None
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1926
|
+
|
|
1927
|
+
def is_package_installed(package_name: str) -> bool:
|
|
1928
|
+
return package_name.lower() in installed_packages
|
|
1929
|
+
|
|
1930
|
+
|
|
1931
|
+
def get_tracer(
|
|
1932
|
+
caller: str = "get_tracer()", log_error_if_tracer_is_none: bool = True
|
|
1933
|
+
) -> Optional[DocentTracer]:
|
|
1934
|
+
"""
|
|
1935
|
+
Get the global Docent tracer if it has been initialized.
|
|
1936
|
+
|
|
1937
|
+
Args:
|
|
1938
|
+
caller: Human-readable name of the API being invoked. Used for log output.
|
|
1939
|
+
log_error_if_tracer_is_none: Whether to log an error if the tracer is None.
|
|
1940
|
+
NOTE(mengk): when get_tracer is called in is_disabled, I don't want an error logged,
|
|
1941
|
+
since that's what I'm trying to check. In other contexts, it makes sense.
|
|
1942
|
+
|
|
1943
|
+
Returns:
|
|
1944
|
+
The global Docent tracer, or None if tracing has not been initialized.
|
|
1945
|
+
"""
|
|
1946
|
+
tracer = _global_tracer
|
|
1947
|
+
if tracer is None:
|
|
1948
|
+
if log_error_if_tracer_is_none:
|
|
1949
|
+
logger.error(
|
|
1950
|
+
f"{caller} requires initialize_tracing() to be called before use. "
|
|
1951
|
+
"You can also disable tracing by calling set_disabled(True) or by setting "
|
|
1952
|
+
"the DOCENT_DISABLE_TRACING environment variable to 'true'."
|
|
1953
|
+
)
|
|
1954
|
+
return None
|
|
1955
|
+
|
|
1956
|
+
if not tracer.is_initialized():
|
|
1957
|
+
logger.error(
|
|
1958
|
+
f"{caller} cannot proceed because initialize_tracing() did not complete successfully. "
|
|
1959
|
+
"You can also disable tracing by calling set_disabled(True) or by setting "
|
|
1960
|
+
"the DOCENT_DISABLE_TRACING environment variable to 'true'."
|
|
1961
|
+
)
|
|
1962
|
+
return None
|
|
1963
|
+
|
|
1964
|
+
return tracer
|
|
1965
|
+
|
|
1966
|
+
|
|
1967
|
+
def close_tracing() -> None:
|
|
1968
|
+
"""Close the global Docent tracer."""
|
|
1969
|
+
global _global_tracer
|
|
1970
|
+
if _global_tracer:
|
|
1971
|
+
_global_tracer.close()
|
|
1972
|
+
_global_tracer = None
|
|
1973
|
+
|
|
1974
|
+
|
|
1975
|
+
def flush_tracing() -> None:
|
|
1976
|
+
"""Force flush all spans to exporters."""
|
|
1977
|
+
if _global_tracer:
|
|
1978
|
+
logger.debug("Flushing Docent tracer")
|
|
1979
|
+
_global_tracer.flush()
|
|
1980
|
+
else:
|
|
1981
|
+
logger.debug("No global tracer available to flush")
|
|
1982
|
+
|
|
1983
|
+
|
|
1984
|
+
def is_initialized() -> bool:
|
|
1985
|
+
"""Verify if the global Docent tracer is properly initialized."""
|
|
1986
|
+
if _global_tracer is None:
|
|
1987
|
+
return False
|
|
1988
|
+
return _global_tracer.is_initialized()
|
|
1989
|
+
|
|
1990
|
+
|
|
1991
|
+
def is_disabled(context_name: str = "Docent tracing") -> bool:
|
|
1992
|
+
"""
|
|
1993
|
+
Check if global tracing is disabled for the given context.
|
|
1994
|
+
|
|
1995
|
+
Args:
|
|
1996
|
+
context_name: Human-readable identifier for the caller used in error reporting.
|
|
1997
|
+
|
|
1998
|
+
Returns:
|
|
1999
|
+
True when tracing is disabled globally, when no initialized tracer exists,
|
|
2000
|
+
or when the active tracer reports being disabled.
|
|
2001
|
+
"""
|
|
2002
|
+
if _global_tracing_disabled:
|
|
2003
|
+
return True
|
|
2004
|
+
tracer = get_tracer(context_name, log_error_if_tracer_is_none=False)
|
|
2005
|
+
if tracer is None:
|
|
2006
|
+
return True
|
|
2007
|
+
return tracer.is_disabled()
|
|
2008
|
+
|
|
2009
|
+
|
|
2010
|
+
def set_disabled(disabled: bool) -> None:
|
|
2011
|
+
"""Enable or disable global tracing."""
|
|
2012
|
+
global _global_tracing_disabled
|
|
2013
|
+
_global_tracing_disabled = disabled
|
|
2014
|
+
if _global_tracer:
|
|
2015
|
+
_global_tracer.set_disabled(disabled)
|
|
2016
|
+
|
|
2017
|
+
|
|
2018
|
+
def agent_run_score(name: str, score: float, attributes: Optional[Dict[str, Any]] = None) -> None:
|
|
2019
|
+
"""
|
|
2020
|
+
Send a score to the backend for the current agent run.
|
|
2021
|
+
|
|
2022
|
+
Args:
|
|
2023
|
+
name: Name of the score metric
|
|
2024
|
+
score: Numeric score value
|
|
2025
|
+
attributes: Optional additional attributes for the score event
|
|
2026
|
+
"""
|
|
2027
|
+
if is_disabled("agent_run_score()"):
|
|
2028
|
+
return
|
|
2029
|
+
|
|
2030
|
+
tracer = get_tracer("agent_run_score()")
|
|
2031
|
+
if tracer is None:
|
|
2032
|
+
logger.error("Docent tracer unavailable; score will not be sent.")
|
|
2033
|
+
return
|
|
2034
|
+
|
|
2035
|
+
agent_run_id = tracer.get_current_agent_run_id()
|
|
2036
|
+
if not agent_run_id:
|
|
2037
|
+
logger.warning("No active agent run context. Score will not be sent.")
|
|
2038
|
+
return
|
|
2039
|
+
|
|
2040
|
+
try:
|
|
2041
|
+
tracer.send_agent_run_score(agent_run_id, name, score, attributes)
|
|
2042
|
+
except Exception as e:
|
|
2043
|
+
logger.error(f"Failed to send score: {e}")
|
|
2044
|
+
|
|
2045
|
+
|
|
2046
|
+
def agent_run_metadata(metadata: Dict[str, Any]) -> None:
|
|
2047
|
+
"""
|
|
2048
|
+
Send metadata directly to the backend for the current agent run.
|
|
2049
|
+
|
|
2050
|
+
Args:
|
|
2051
|
+
metadata: Dictionary of metadata to attach to the current span (can be nested)
|
|
2052
|
+
|
|
2053
|
+
Example:
|
|
2054
|
+
agent_run_metadata({"user": "John", "id": 123, "flagged": True})
|
|
2055
|
+
agent_run_metadata({"user": {"id": "123", "name": "John"}, "config": {"model": "gpt-4"}})
|
|
2056
|
+
"""
|
|
2057
|
+
if is_disabled("agent_run_metadata()"):
|
|
2058
|
+
return
|
|
2059
|
+
|
|
2060
|
+
tracer = get_tracer("agent_run_metadata()")
|
|
2061
|
+
if tracer is None:
|
|
2062
|
+
logger.error("Docent tracer unavailable; agent run metadata will not be sent.")
|
|
2063
|
+
return
|
|
2064
|
+
|
|
2065
|
+
agent_run_id = tracer.get_current_agent_run_id()
|
|
2066
|
+
if not agent_run_id:
|
|
2067
|
+
logger.warning("No active agent run context. Metadata will not be sent.")
|
|
2068
|
+
return
|
|
2069
|
+
|
|
2070
|
+
try:
|
|
2071
|
+
tracer.send_agent_run_metadata(agent_run_id, metadata)
|
|
2072
|
+
except Exception as e:
|
|
2073
|
+
logger.error(f"Failed to send agent run metadata: {e}")
|
|
2074
|
+
|
|
2075
|
+
|
|
2076
|
+
def transcript_metadata(
|
|
2077
|
+
metadata: Dict[str, Any],
|
|
2078
|
+
*,
|
|
2079
|
+
name: Optional[str] = None,
|
|
2080
|
+
description: Optional[str] = None,
|
|
2081
|
+
transcript_group_id: Optional[str] = None,
|
|
2082
|
+
) -> None:
|
|
2083
|
+
"""
|
|
2084
|
+
Send transcript metadata directly to the backend for the current transcript.
|
|
2085
|
+
|
|
2086
|
+
Args:
|
|
2087
|
+
metadata: Dictionary of metadata to attach to the current transcript (required)
|
|
2088
|
+
name: Optional transcript name
|
|
2089
|
+
description: Optional transcript description
|
|
2090
|
+
transcript_group_id: Optional transcript group ID to associate with
|
|
2091
|
+
|
|
2092
|
+
Example:
|
|
2093
|
+
transcript_metadata({"user": "John", "model": "gpt-4"})
|
|
2094
|
+
transcript_metadata({"env": "prod"}, name="data_processing")
|
|
2095
|
+
transcript_metadata(
|
|
2096
|
+
{"team": "search"},
|
|
2097
|
+
name="validation",
|
|
2098
|
+
transcript_group_id="group-123",
|
|
2099
|
+
)
|
|
2100
|
+
"""
|
|
2101
|
+
if is_disabled("transcript_metadata()"):
|
|
2102
|
+
return
|
|
2103
|
+
|
|
2104
|
+
tracer = get_tracer("transcript_metadata()")
|
|
2105
|
+
if tracer is None:
|
|
2106
|
+
logger.error("Docent tracer unavailable; transcript metadata will not be sent.")
|
|
2107
|
+
return
|
|
2108
|
+
|
|
2109
|
+
transcript_id = tracer.get_current_transcript_id()
|
|
2110
|
+
if not transcript_id:
|
|
2111
|
+
logger.warning("No active transcript context. Metadata will not be sent.")
|
|
2112
|
+
return
|
|
2113
|
+
|
|
2114
|
+
try:
|
|
2115
|
+
tracer.send_transcript_metadata(
|
|
2116
|
+
transcript_id, name, description, transcript_group_id, metadata
|
|
2117
|
+
)
|
|
2118
|
+
except Exception as e:
|
|
2119
|
+
logger.error(f"Failed to send transcript metadata: {e}")
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
def transcript_group_metadata(
|
|
2123
|
+
metadata: Dict[str, Any],
|
|
2124
|
+
*,
|
|
2125
|
+
name: Optional[str] = None,
|
|
2126
|
+
description: Optional[str] = None,
|
|
2127
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
2128
|
+
) -> None:
|
|
2129
|
+
"""
|
|
2130
|
+
Send transcript group metadata directly to the backend for the current transcript group.
|
|
2131
|
+
|
|
2132
|
+
Args:
|
|
2133
|
+
metadata: Dictionary of metadata to attach to the current transcript group (required)
|
|
2134
|
+
name: Optional transcript group name
|
|
2135
|
+
description: Optional transcript group description
|
|
2136
|
+
parent_transcript_group_id: Optional parent transcript group ID
|
|
2137
|
+
|
|
2138
|
+
Example:
|
|
2139
|
+
transcript_group_metadata({"team": "search", "env": "prod"})
|
|
2140
|
+
transcript_group_metadata({"env": "prod"}, name="pipeline")
|
|
2141
|
+
transcript_group_metadata(
|
|
2142
|
+
{"team": "search"},
|
|
2143
|
+
name="pipeline",
|
|
2144
|
+
parent_transcript_group_id="root-group",
|
|
2145
|
+
)
|
|
2146
|
+
"""
|
|
2147
|
+
if is_disabled("transcript_group_metadata()"):
|
|
2148
|
+
return
|
|
2149
|
+
|
|
2150
|
+
tracer = get_tracer("transcript_group_metadata()")
|
|
2151
|
+
if tracer is None:
|
|
2152
|
+
logger.error("Docent tracer unavailable; transcript group metadata will not be sent.")
|
|
2153
|
+
return
|
|
2154
|
+
|
|
2155
|
+
transcript_group_id = tracer.get_current_transcript_group_id()
|
|
2156
|
+
if not transcript_group_id:
|
|
2157
|
+
logger.warning("No active transcript group context. Metadata will not be sent.")
|
|
2158
|
+
return
|
|
2159
|
+
|
|
2160
|
+
try:
|
|
2161
|
+
tracer.send_transcript_group_metadata(
|
|
2162
|
+
transcript_group_id, name, description, parent_transcript_group_id, metadata
|
|
2163
|
+
)
|
|
2164
|
+
except Exception as e:
|
|
2165
|
+
logger.error(f"Failed to send transcript group metadata: {e}")
|
|
2166
|
+
|
|
2167
|
+
|
|
2168
|
+
class AgentRunContext:
|
|
2169
|
+
"""Context manager that works in both sync and async contexts."""
|
|
2170
|
+
|
|
2171
|
+
def __init__(
|
|
2172
|
+
self,
|
|
2173
|
+
agent_run_id: Optional[str] = None,
|
|
2174
|
+
transcript_id: Optional[str] = None,
|
|
2175
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2176
|
+
**attributes: Any,
|
|
2177
|
+
):
|
|
2178
|
+
self.agent_run_id = agent_run_id
|
|
2179
|
+
self.transcript_id = transcript_id
|
|
2180
|
+
self.metadata = metadata
|
|
2181
|
+
self.attributes: dict[str, Any] = attributes
|
|
2182
|
+
self._sync_context: Optional[Any] = None
|
|
2183
|
+
self._async_context: Optional[Any] = None
|
|
2184
|
+
|
|
2185
|
+
def __enter__(self) -> tuple[str, str]:
|
|
2186
|
+
"""Sync context manager entry."""
|
|
2187
|
+
if is_disabled("agent_run_context"):
|
|
2188
|
+
self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
|
|
2189
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2190
|
+
return self.agent_run_id, self.transcript_id
|
|
2191
|
+
|
|
2192
|
+
tracer = get_tracer("agent_run_context")
|
|
2193
|
+
if tracer is None:
|
|
2194
|
+
logger.error("Cannot enter agent_run_context because tracing is not initialized.")
|
|
2195
|
+
self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
|
|
2196
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2197
|
+
return self.agent_run_id, self.transcript_id
|
|
2198
|
+
self._sync_context = tracer.agent_run_context(
|
|
2199
|
+
self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
|
|
2200
|
+
)
|
|
2201
|
+
return self._sync_context.__enter__()
|
|
2202
|
+
|
|
2203
|
+
def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
|
|
2204
|
+
"""Sync context manager exit."""
|
|
2205
|
+
if self._sync_context:
|
|
2206
|
+
self._sync_context.__exit__(exc_type, exc_val, exc_tb)
|
|
2207
|
+
|
|
2208
|
+
async def __aenter__(self) -> tuple[str, str]:
|
|
2209
|
+
"""Async context manager entry."""
|
|
2210
|
+
if is_disabled("agent_run_context"):
|
|
2211
|
+
self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
|
|
2212
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2213
|
+
return self.agent_run_id, self.transcript_id
|
|
2214
|
+
|
|
2215
|
+
tracer = get_tracer("agent_run_context")
|
|
2216
|
+
if tracer is None:
|
|
2217
|
+
logger.error("Cannot enter agent_run_context because tracing is not initialized.")
|
|
2218
|
+
self.agent_run_id = _get_disabled_agent_run_id(self.agent_run_id)
|
|
2219
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2220
|
+
return self.agent_run_id, self.transcript_id
|
|
2221
|
+
self._async_context = tracer.async_agent_run_context(
|
|
2222
|
+
self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
|
|
2223
|
+
)
|
|
2224
|
+
return await self._async_context.__aenter__()
|
|
2225
|
+
|
|
2226
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
2227
|
+
"""Async context manager exit."""
|
|
2228
|
+
if self._async_context:
|
|
2229
|
+
await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
|
|
2230
|
+
|
|
2231
|
+
|
|
2232
|
+
def agent_run(
|
|
2233
|
+
func: Optional[Callable[..., Any]] = None, *, metadata: Optional[Dict[str, Any]] = None
|
|
2234
|
+
):
|
|
2235
|
+
"""
|
|
2236
|
+
Decorator to wrap a function in an agent_run_context (sync or async).
|
|
2237
|
+
Injects agent_run_id and transcript_id as function attributes.
|
|
2238
|
+
Optionally accepts metadata to attach to the agent run context.
|
|
2239
|
+
|
|
2240
|
+
Example:
|
|
2241
|
+
@agent_run
|
|
2242
|
+
def my_func(x, y):
|
|
2243
|
+
print(my_func.docent.agent_run_id, my_func.docent.transcript_id)
|
|
2244
|
+
|
|
2245
|
+
@agent_run(metadata={"user": "John", "model": "gpt-4"})
|
|
2246
|
+
def my_func_with_metadata(x, y):
|
|
2247
|
+
print(my_func_with_metadata.docent.agent_run_id)
|
|
2248
|
+
|
|
2249
|
+
@agent_run(metadata={"config": {"model": "gpt-4", "temperature": 0.7}})
|
|
2250
|
+
async def my_async_func(z):
|
|
2251
|
+
print(my_async_func.docent.agent_run_id)
|
|
2252
|
+
"""
|
|
2253
|
+
import functools
|
|
2254
|
+
import inspect
|
|
2255
|
+
|
|
2256
|
+
def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
2257
|
+
if inspect.iscoroutinefunction(f):
|
|
2258
|
+
|
|
2259
|
+
@functools.wraps(f)
|
|
2260
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
2261
|
+
async with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
|
|
2262
|
+
# Store docent data as function attributes
|
|
2263
|
+
setattr(
|
|
2264
|
+
async_wrapper,
|
|
2265
|
+
"docent",
|
|
2266
|
+
type(
|
|
2267
|
+
"DocentData",
|
|
2268
|
+
(),
|
|
2269
|
+
{
|
|
2270
|
+
"agent_run_id": agent_run_id,
|
|
2271
|
+
"transcript_id": transcript_id,
|
|
2272
|
+
},
|
|
2273
|
+
)(),
|
|
2274
|
+
)
|
|
2275
|
+
return await f(*args, **kwargs)
|
|
2276
|
+
|
|
2277
|
+
return async_wrapper
|
|
2278
|
+
else:
|
|
2279
|
+
|
|
2280
|
+
@functools.wraps(f)
|
|
2281
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
2282
|
+
with AgentRunContext(metadata=metadata) as (agent_run_id, transcript_id):
|
|
2283
|
+
# Store docent data as function attributes
|
|
2284
|
+
setattr(
|
|
2285
|
+
sync_wrapper,
|
|
2286
|
+
"docent",
|
|
2287
|
+
type(
|
|
2288
|
+
"DocentData",
|
|
2289
|
+
(),
|
|
2290
|
+
{
|
|
2291
|
+
"agent_run_id": agent_run_id,
|
|
2292
|
+
"transcript_id": transcript_id,
|
|
2293
|
+
},
|
|
2294
|
+
)(),
|
|
2295
|
+
)
|
|
2296
|
+
return f(*args, **kwargs)
|
|
2297
|
+
|
|
2298
|
+
return sync_wrapper
|
|
2299
|
+
|
|
2300
|
+
if func is None:
|
|
2301
|
+
return decorator
|
|
2302
|
+
else:
|
|
2303
|
+
return decorator(func)
|
|
2304
|
+
|
|
2305
|
+
|
|
2306
|
+
def agent_run_context(
|
|
2307
|
+
agent_run_id: Optional[str] = None,
|
|
2308
|
+
transcript_id: Optional[str] = None,
|
|
2309
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2310
|
+
**attributes: Any,
|
|
2311
|
+
) -> AgentRunContext:
|
|
2312
|
+
"""
|
|
2313
|
+
Create an agent run context for tracing.
|
|
2314
|
+
|
|
2315
|
+
Args:
|
|
2316
|
+
agent_run_id: Optional agent run ID (auto-generated if not provided)
|
|
2317
|
+
transcript_id: Optional transcript ID (auto-generated if not provided)
|
|
2318
|
+
metadata: Optional nested dictionary of metadata to attach as events
|
|
2319
|
+
**attributes: Additional attributes to add to the context
|
|
2320
|
+
|
|
2321
|
+
Returns:
|
|
2322
|
+
A context manager that can be used with both 'with' and 'async with'
|
|
2323
|
+
|
|
2324
|
+
Example:
|
|
2325
|
+
# Sync usage
|
|
2326
|
+
with agent_run_context() as (agent_run_id, transcript_id):
|
|
2327
|
+
pass
|
|
2328
|
+
|
|
2329
|
+
# Async usage
|
|
2330
|
+
async with agent_run_context() as (agent_run_id, transcript_id):
|
|
2331
|
+
pass
|
|
2332
|
+
|
|
2333
|
+
# With metadata
|
|
2334
|
+
with agent_run_context(metadata={"user": "John", "model": "gpt-4"}) as (agent_run_id, transcript_id):
|
|
2335
|
+
pass
|
|
2336
|
+
"""
|
|
2337
|
+
return AgentRunContext(agent_run_id, transcript_id, metadata=metadata, **attributes)
|
|
2338
|
+
|
|
2339
|
+
|
|
2340
|
+
class TranscriptContext:
|
|
2341
|
+
"""Context manager for creating and managing transcripts."""
|
|
2342
|
+
|
|
2343
|
+
def __init__(
|
|
2344
|
+
self,
|
|
2345
|
+
name: Optional[str] = None,
|
|
2346
|
+
transcript_id: Optional[str] = None,
|
|
2347
|
+
description: Optional[str] = None,
|
|
2348
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2349
|
+
transcript_group_id: Optional[str] = None,
|
|
2350
|
+
):
|
|
2351
|
+
self.name = name
|
|
2352
|
+
self.transcript_id = transcript_id
|
|
2353
|
+
self.description = description
|
|
2354
|
+
self.metadata = metadata
|
|
2355
|
+
self.transcript_group_id = transcript_group_id
|
|
2356
|
+
self._sync_context: Optional[Any] = None
|
|
2357
|
+
self._async_context: Optional[Any] = None
|
|
2358
|
+
|
|
2359
|
+
def __enter__(self) -> str:
|
|
2360
|
+
"""Sync context manager entry."""
|
|
2361
|
+
if is_disabled("transcript_context"):
|
|
2362
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2363
|
+
return self.transcript_id
|
|
2364
|
+
|
|
2365
|
+
tracer = get_tracer("transcript_context")
|
|
2366
|
+
if tracer is None:
|
|
2367
|
+
logger.error("Cannot enter transcript_context because tracing is not initialized.")
|
|
2368
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2369
|
+
return self.transcript_id
|
|
2370
|
+
self._sync_context = tracer.transcript_context(
|
|
2371
|
+
name=self.name,
|
|
2372
|
+
transcript_id=self.transcript_id,
|
|
2373
|
+
description=self.description,
|
|
2374
|
+
metadata=self.metadata,
|
|
2375
|
+
transcript_group_id=self.transcript_group_id,
|
|
2376
|
+
)
|
|
2377
|
+
return self._sync_context.__enter__()
|
|
2378
|
+
|
|
2379
|
+
def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
|
|
2380
|
+
"""Sync context manager exit."""
|
|
2381
|
+
if self._sync_context:
|
|
2382
|
+
self._sync_context.__exit__(exc_type, exc_val, exc_tb)
|
|
2383
|
+
|
|
2384
|
+
async def __aenter__(self) -> str:
|
|
2385
|
+
"""Async context manager entry."""
|
|
2386
|
+
if is_disabled("transcript_context"):
|
|
2387
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2388
|
+
return self.transcript_id
|
|
2389
|
+
|
|
2390
|
+
tracer = get_tracer("transcript_context")
|
|
2391
|
+
if tracer is None:
|
|
2392
|
+
logger.error("Cannot enter transcript_context because tracing is not initialized.")
|
|
2393
|
+
self.transcript_id = _get_disabled_transcript_id(self.transcript_id)
|
|
2394
|
+
return self.transcript_id
|
|
2395
|
+
self._async_context = tracer.async_transcript_context(
|
|
2396
|
+
name=self.name,
|
|
2397
|
+
transcript_id=self.transcript_id,
|
|
2398
|
+
description=self.description,
|
|
2399
|
+
metadata=self.metadata,
|
|
2400
|
+
transcript_group_id=self.transcript_group_id,
|
|
2401
|
+
)
|
|
2402
|
+
return await self._async_context.__aenter__()
|
|
2403
|
+
|
|
2404
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
2405
|
+
"""Async context manager exit."""
|
|
2406
|
+
if self._async_context:
|
|
2407
|
+
await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
|
|
2408
|
+
|
|
2409
|
+
|
|
2410
|
+
def transcript(
|
|
2411
|
+
func: Optional[Callable[..., Any]] = None,
|
|
2412
|
+
*,
|
|
2413
|
+
name: Optional[str] = None,
|
|
2414
|
+
transcript_id: Optional[str] = None,
|
|
2415
|
+
description: Optional[str] = None,
|
|
2416
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2417
|
+
transcript_group_id: Optional[str] = None,
|
|
2418
|
+
):
|
|
2419
|
+
"""
|
|
2420
|
+
Decorator to wrap a function in a transcript context.
|
|
2421
|
+
Injects transcript_id as a function attribute.
|
|
2422
|
+
|
|
2423
|
+
Example:
|
|
2424
|
+
@transcript
|
|
2425
|
+
def my_func(x, y):
|
|
2426
|
+
print(my_func.docent.transcript_id)
|
|
2427
|
+
|
|
2428
|
+
@transcript(name="data_processing", description="Process user data")
|
|
2429
|
+
def my_func_with_name(x, y):
|
|
2430
|
+
print(my_func_with_name.docent.transcript_id)
|
|
2431
|
+
|
|
2432
|
+
@transcript(metadata={"user": "John", "model": "gpt-4"})
|
|
2433
|
+
async def my_async_func(z):
|
|
2434
|
+
print(my_async_func.docent.transcript_id)
|
|
2435
|
+
"""
|
|
2436
|
+
import functools
|
|
2437
|
+
import inspect
|
|
2438
|
+
|
|
2439
|
+
def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
2440
|
+
if inspect.iscoroutinefunction(f):
|
|
2441
|
+
|
|
2442
|
+
@functools.wraps(f)
|
|
2443
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
2444
|
+
async with TranscriptContext(
|
|
2445
|
+
name=name,
|
|
2446
|
+
transcript_id=transcript_id,
|
|
2447
|
+
description=description,
|
|
2448
|
+
metadata=metadata,
|
|
2449
|
+
transcript_group_id=transcript_group_id,
|
|
2450
|
+
) as transcript_id_result:
|
|
2451
|
+
# Store docent data as function attributes
|
|
2452
|
+
setattr(
|
|
2453
|
+
async_wrapper,
|
|
2454
|
+
"docent",
|
|
2455
|
+
type(
|
|
2456
|
+
"DocentData",
|
|
2457
|
+
(),
|
|
2458
|
+
{
|
|
2459
|
+
"transcript_id": transcript_id_result,
|
|
2460
|
+
},
|
|
2461
|
+
)(),
|
|
2462
|
+
)
|
|
2463
|
+
return await f(*args, **kwargs)
|
|
2464
|
+
|
|
2465
|
+
return async_wrapper
|
|
2466
|
+
else:
|
|
2467
|
+
|
|
2468
|
+
@functools.wraps(f)
|
|
2469
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
2470
|
+
with TranscriptContext(
|
|
2471
|
+
name=name,
|
|
2472
|
+
transcript_id=transcript_id,
|
|
2473
|
+
description=description,
|
|
2474
|
+
metadata=metadata,
|
|
2475
|
+
transcript_group_id=transcript_group_id,
|
|
2476
|
+
) as transcript_id_result:
|
|
2477
|
+
# Store docent data as function attributes
|
|
2478
|
+
setattr(
|
|
2479
|
+
sync_wrapper,
|
|
2480
|
+
"docent",
|
|
2481
|
+
type(
|
|
2482
|
+
"DocentData",
|
|
2483
|
+
(),
|
|
2484
|
+
{
|
|
2485
|
+
"transcript_id": transcript_id_result,
|
|
2486
|
+
},
|
|
2487
|
+
)(),
|
|
2488
|
+
)
|
|
2489
|
+
return f(*args, **kwargs)
|
|
2490
|
+
|
|
2491
|
+
return sync_wrapper
|
|
2492
|
+
|
|
2493
|
+
if func is None:
|
|
2494
|
+
return decorator
|
|
2495
|
+
else:
|
|
2496
|
+
return decorator(func)
|
|
2497
|
+
|
|
2498
|
+
|
|
2499
|
+
def transcript_context(
|
|
2500
|
+
name: Optional[str] = None,
|
|
2501
|
+
transcript_id: Optional[str] = None,
|
|
2502
|
+
description: Optional[str] = None,
|
|
2503
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2504
|
+
transcript_group_id: Optional[str] = None,
|
|
2505
|
+
) -> TranscriptContext:
|
|
2506
|
+
"""
|
|
2507
|
+
Create a transcript context for tracing.
|
|
2508
|
+
|
|
2509
|
+
Args:
|
|
2510
|
+
name: Optional transcript name
|
|
2511
|
+
transcript_id: Optional transcript ID (auto-generated if not provided)
|
|
2512
|
+
description: Optional transcript description
|
|
2513
|
+
metadata: Optional metadata to attach to the transcript
|
|
2514
|
+
parent_transcript_id: Optional parent transcript ID
|
|
2515
|
+
|
|
2516
|
+
Returns:
|
|
2517
|
+
A context manager that can be used with both 'with' and 'async with'
|
|
2518
|
+
|
|
2519
|
+
Example:
|
|
2520
|
+
# Sync usage
|
|
2521
|
+
with transcript_context(name="data_processing") as transcript_id:
|
|
2522
|
+
pass
|
|
2523
|
+
|
|
2524
|
+
# Async usage
|
|
2525
|
+
async with transcript_context(description="Process user data") as transcript_id:
|
|
2526
|
+
pass
|
|
2527
|
+
|
|
2528
|
+
# With metadata
|
|
2529
|
+
with transcript_context(metadata={"user": "John", "model": "gpt-4"}) as transcript_id:
|
|
2530
|
+
pass
|
|
2531
|
+
"""
|
|
2532
|
+
return TranscriptContext(name, transcript_id, description, metadata, transcript_group_id)
|
|
2533
|
+
|
|
2534
|
+
|
|
2535
|
+
class TranscriptGroupContext:
|
|
2536
|
+
"""Context manager for creating and managing transcript groups."""
|
|
2537
|
+
|
|
2538
|
+
def __init__(
|
|
2539
|
+
self,
|
|
2540
|
+
name: Optional[str] = None,
|
|
2541
|
+
transcript_group_id: Optional[str] = None,
|
|
2542
|
+
description: Optional[str] = None,
|
|
2543
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2544
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
2545
|
+
):
|
|
2546
|
+
self.name = name
|
|
2547
|
+
self.transcript_group_id = transcript_group_id
|
|
2548
|
+
self.description = description
|
|
2549
|
+
self.metadata = metadata
|
|
2550
|
+
self.parent_transcript_group_id = parent_transcript_group_id
|
|
2551
|
+
self._sync_context: Optional[Any] = None
|
|
2552
|
+
self._async_context: Optional[Any] = None
|
|
2553
|
+
|
|
2554
|
+
def __enter__(self) -> str:
|
|
2555
|
+
"""Sync context manager entry."""
|
|
2556
|
+
if is_disabled("transcript_group_context"):
|
|
2557
|
+
self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
|
|
2558
|
+
return self.transcript_group_id
|
|
2559
|
+
|
|
2560
|
+
tracer = get_tracer("transcript_group_context")
|
|
2561
|
+
if tracer is None:
|
|
2562
|
+
logger.error(
|
|
2563
|
+
"Cannot enter transcript_group_context because tracing is not initialized."
|
|
2564
|
+
)
|
|
2565
|
+
self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
|
|
2566
|
+
return self.transcript_group_id
|
|
2567
|
+
self._sync_context = tracer.transcript_group_context(
|
|
2568
|
+
name=self.name,
|
|
2569
|
+
transcript_group_id=self.transcript_group_id,
|
|
2570
|
+
description=self.description,
|
|
2571
|
+
metadata=self.metadata,
|
|
2572
|
+
parent_transcript_group_id=self.parent_transcript_group_id,
|
|
2573
|
+
)
|
|
2574
|
+
return self._sync_context.__enter__()
|
|
2575
|
+
|
|
2576
|
+
def __exit__(self, exc_type: type[BaseException], exc_val: Any, exc_tb: Any) -> None:
|
|
2577
|
+
"""Sync context manager exit."""
|
|
2578
|
+
if self._sync_context:
|
|
2579
|
+
self._sync_context.__exit__(exc_type, exc_val, exc_tb)
|
|
2580
|
+
|
|
2581
|
+
async def __aenter__(self) -> str:
|
|
2582
|
+
"""Async context manager entry."""
|
|
2583
|
+
if is_disabled("transcript_group_context"):
|
|
2584
|
+
self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
|
|
2585
|
+
return self.transcript_group_id
|
|
2586
|
+
|
|
2587
|
+
tracer = get_tracer("transcript_group_context")
|
|
2588
|
+
if tracer is None:
|
|
2589
|
+
logger.error(
|
|
2590
|
+
"Cannot enter transcript_group_context because tracing is not initialized."
|
|
2591
|
+
)
|
|
2592
|
+
self.transcript_group_id = _get_disabled_transcript_group_id(self.transcript_group_id)
|
|
2593
|
+
return self.transcript_group_id
|
|
2594
|
+
self._async_context = tracer.async_transcript_group_context(
|
|
2595
|
+
name=self.name,
|
|
2596
|
+
transcript_group_id=self.transcript_group_id,
|
|
2597
|
+
description=self.description,
|
|
2598
|
+
metadata=self.metadata,
|
|
2599
|
+
parent_transcript_group_id=self.parent_transcript_group_id,
|
|
2600
|
+
)
|
|
2601
|
+
return await self._async_context.__aenter__()
|
|
2602
|
+
|
|
2603
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
2604
|
+
"""Async context manager exit."""
|
|
2605
|
+
if self._async_context:
|
|
2606
|
+
await self._async_context.__aexit__(exc_type, exc_val, exc_tb)
|
|
2607
|
+
|
|
2608
|
+
|
|
2609
|
+
def transcript_group(
|
|
2610
|
+
func: Optional[Callable[..., Any]] = None,
|
|
2611
|
+
*,
|
|
2612
|
+
name: Optional[str] = None,
|
|
2613
|
+
transcript_group_id: Optional[str] = None,
|
|
2614
|
+
description: Optional[str] = None,
|
|
2615
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2616
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
2617
|
+
):
|
|
2618
|
+
"""
|
|
2619
|
+
Decorator to wrap a function in a transcript group context.
|
|
2620
|
+
Injects transcript_group_id as a function attribute.
|
|
2621
|
+
|
|
2622
|
+
Example:
|
|
2623
|
+
@transcript_group
|
|
2624
|
+
def my_func(x, y):
|
|
2625
|
+
print(my_func.docent.transcript_group_id)
|
|
2626
|
+
|
|
2627
|
+
@transcript_group(name="data_processing", description="Process user data")
|
|
2628
|
+
def my_func_with_name(x, y):
|
|
2629
|
+
print(my_func_with_name.docent.transcript_group_id)
|
|
2630
|
+
|
|
2631
|
+
@transcript_group(metadata={"user": "John", "model": "gpt-4"})
|
|
2632
|
+
async def my_async_func(z):
|
|
2633
|
+
print(my_async_func.docent.transcript_group_id)
|
|
2634
|
+
"""
|
|
2635
|
+
import functools
|
|
2636
|
+
import inspect
|
|
2637
|
+
|
|
2638
|
+
def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
2639
|
+
if inspect.iscoroutinefunction(f):
|
|
2640
|
+
|
|
2641
|
+
@functools.wraps(f)
|
|
2642
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
2643
|
+
async with TranscriptGroupContext(
|
|
2644
|
+
name=name,
|
|
2645
|
+
transcript_group_id=transcript_group_id,
|
|
2646
|
+
description=description,
|
|
2647
|
+
metadata=metadata,
|
|
2648
|
+
parent_transcript_group_id=parent_transcript_group_id,
|
|
2649
|
+
) as transcript_group_id_result:
|
|
2650
|
+
# Store docent data as function attributes
|
|
2651
|
+
setattr(
|
|
2652
|
+
async_wrapper,
|
|
2653
|
+
"docent",
|
|
2654
|
+
type(
|
|
2655
|
+
"DocentData",
|
|
2656
|
+
(),
|
|
2657
|
+
{
|
|
2658
|
+
"transcript_group_id": transcript_group_id_result,
|
|
2659
|
+
},
|
|
2660
|
+
)(),
|
|
2661
|
+
)
|
|
2662
|
+
return await f(*args, **kwargs)
|
|
2663
|
+
|
|
2664
|
+
return async_wrapper
|
|
2665
|
+
else:
|
|
2666
|
+
|
|
2667
|
+
@functools.wraps(f)
|
|
2668
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
2669
|
+
with TranscriptGroupContext(
|
|
2670
|
+
name=name,
|
|
2671
|
+
transcript_group_id=transcript_group_id,
|
|
2672
|
+
description=description,
|
|
2673
|
+
metadata=metadata,
|
|
2674
|
+
parent_transcript_group_id=parent_transcript_group_id,
|
|
2675
|
+
) as transcript_group_id_result:
|
|
2676
|
+
# Store docent data as function attributes
|
|
2677
|
+
setattr(
|
|
2678
|
+
sync_wrapper,
|
|
2679
|
+
"docent",
|
|
2680
|
+
type(
|
|
2681
|
+
"DocentData",
|
|
2682
|
+
(),
|
|
2683
|
+
{
|
|
2684
|
+
"transcript_group_id": transcript_group_id_result,
|
|
2685
|
+
},
|
|
2686
|
+
)(),
|
|
2687
|
+
)
|
|
2688
|
+
return f(*args, **kwargs)
|
|
2689
|
+
|
|
2690
|
+
return sync_wrapper
|
|
2691
|
+
|
|
2692
|
+
if func is None:
|
|
2693
|
+
return decorator
|
|
2694
|
+
else:
|
|
2695
|
+
return decorator(func)
|
|
2696
|
+
|
|
2697
|
+
|
|
2698
|
+
def transcript_group_context(
|
|
2699
|
+
name: Optional[str] = None,
|
|
2700
|
+
transcript_group_id: Optional[str] = None,
|
|
2701
|
+
description: Optional[str] = None,
|
|
2702
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
2703
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
2704
|
+
) -> TranscriptGroupContext:
|
|
2705
|
+
"""
|
|
2706
|
+
Create a transcript group context for tracing.
|
|
2707
|
+
|
|
2708
|
+
Args:
|
|
2709
|
+
name: Optional transcript group name
|
|
2710
|
+
transcript_group_id: Optional transcript group ID (auto-generated if not provided)
|
|
2711
|
+
description: Optional transcript group description
|
|
2712
|
+
metadata: Optional metadata to attach to the transcript group
|
|
2713
|
+
parent_transcript_group_id: Optional parent transcript group ID
|
|
2714
|
+
|
|
2715
|
+
Returns:
|
|
2716
|
+
A context manager that can be used with both 'with' and 'async with'
|
|
2717
|
+
|
|
2718
|
+
Example:
|
|
2719
|
+
# Sync usage
|
|
2720
|
+
with transcript_group_context(name="data_processing") as transcript_group_id:
|
|
2721
|
+
pass
|
|
2722
|
+
|
|
2723
|
+
# Async usage
|
|
2724
|
+
async with transcript_group_context(description="Process user data") as transcript_group_id:
|
|
2725
|
+
pass
|
|
2726
|
+
|
|
2727
|
+
# With metadata
|
|
2728
|
+
with transcript_group_context(metadata={"user": "John", "model": "gpt-4"}) as transcript_group_id:
|
|
2729
|
+
pass
|
|
2730
|
+
"""
|
|
2731
|
+
return TranscriptGroupContext(
|
|
2732
|
+
name, transcript_group_id, description, metadata, parent_transcript_group_id
|
|
2733
|
+
)
|
|
2734
|
+
|
|
2735
|
+
|
|
2736
|
+
def _is_notebook() -> bool:
|
|
2737
|
+
"""Check if we're running in a Jupyter notebook."""
|
|
2738
|
+
try:
|
|
2739
|
+
return "ipykernel" in sys.modules
|
|
2740
|
+
except Exception:
|
|
2741
|
+
return False
|