docent-python 0.1.17a0__py3-none-any.whl → 0.1.27a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docent-python might be problematic. Click here for more details.
- docent/_llm_util/__init__.py +0 -0
- docent/_llm_util/data_models/__init__.py +0 -0
- docent/_llm_util/data_models/exceptions.py +48 -0
- docent/_llm_util/data_models/llm_output.py +331 -0
- docent/_llm_util/llm_cache.py +193 -0
- docent/_llm_util/llm_svc.py +472 -0
- docent/_llm_util/model_registry.py +130 -0
- docent/_llm_util/providers/__init__.py +0 -0
- docent/_llm_util/providers/anthropic.py +537 -0
- docent/_llm_util/providers/common.py +41 -0
- docent/_llm_util/providers/google.py +530 -0
- docent/_llm_util/providers/openai.py +745 -0
- docent/_llm_util/providers/openrouter.py +375 -0
- docent/_llm_util/providers/preference_types.py +104 -0
- docent/_llm_util/providers/provider_registry.py +164 -0
- docent/data_models/__init__.py +2 -0
- docent/data_models/agent_run.py +6 -5
- docent/data_models/chat/__init__.py +6 -1
- docent/data_models/citation.py +103 -22
- docent/data_models/judge.py +19 -0
- docent/data_models/metadata_util.py +16 -0
- docent/data_models/remove_invalid_citation_ranges.py +23 -10
- docent/data_models/transcript.py +20 -16
- docent/data_models/util.py +170 -0
- docent/judges/__init__.py +23 -0
- docent/judges/analysis.py +77 -0
- docent/judges/impl.py +587 -0
- docent/judges/runner.py +129 -0
- docent/judges/stats.py +205 -0
- docent/judges/types.py +311 -0
- docent/judges/util/forgiving_json.py +108 -0
- docent/judges/util/meta_schema.json +86 -0
- docent/judges/util/meta_schema.py +29 -0
- docent/judges/util/parse_output.py +87 -0
- docent/judges/util/voting.py +139 -0
- docent/sdk/agent_run_writer.py +62 -19
- docent/sdk/client.py +244 -23
- docent/trace.py +413 -90
- {docent_python-0.1.17a0.dist-info → docent_python-0.1.27a0.dist-info}/METADATA +11 -5
- docent_python-0.1.27a0.dist-info/RECORD +59 -0
- docent/data_models/metadata.py +0 -229
- docent/data_models/yaml_util.py +0 -12
- docent_python-0.1.17a0.dist-info/RECORD +0 -32
- {docent_python-0.1.17a0.dist-info → docent_python-0.1.27a0.dist-info}/WHEEL +0 -0
- {docent_python-0.1.17a0.dist-info → docent_python-0.1.27a0.dist-info}/licenses/LICENSE.md +0 -0
docent/trace.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import atexit
|
|
2
2
|
import contextvars
|
|
3
3
|
import itertools
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
6
7
|
import sys
|
|
@@ -12,7 +13,19 @@ from contextvars import ContextVar, Token
|
|
|
12
13
|
from datetime import datetime, timezone
|
|
13
14
|
from enum import Enum
|
|
14
15
|
from importlib.metadata import Distribution, distributions
|
|
15
|
-
from typing import
|
|
16
|
+
from typing import (
|
|
17
|
+
Any,
|
|
18
|
+
AsyncIterator,
|
|
19
|
+
Callable,
|
|
20
|
+
Dict,
|
|
21
|
+
Iterator,
|
|
22
|
+
List,
|
|
23
|
+
Mapping,
|
|
24
|
+
Optional,
|
|
25
|
+
Set,
|
|
26
|
+
Union,
|
|
27
|
+
cast,
|
|
28
|
+
)
|
|
16
29
|
|
|
17
30
|
import requests
|
|
18
31
|
from opentelemetry import trace
|
|
@@ -21,26 +34,30 @@ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExport
|
|
|
21
34
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
|
|
22
35
|
from opentelemetry.instrumentation.threading import ThreadingInstrumentor
|
|
23
36
|
from opentelemetry.sdk.resources import Resource
|
|
24
|
-
from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
|
|
37
|
+
from opentelemetry.sdk.trace import ReadableSpan, SpanLimits, SpanProcessor, TracerProvider
|
|
25
38
|
from opentelemetry.sdk.trace.export import (
|
|
26
39
|
BatchSpanProcessor,
|
|
27
40
|
ConsoleSpanExporter,
|
|
28
41
|
SimpleSpanProcessor,
|
|
29
42
|
)
|
|
30
43
|
from opentelemetry.trace import Span
|
|
44
|
+
from requests import Response
|
|
31
45
|
|
|
32
|
-
# Configure logging
|
|
33
46
|
logger = logging.getLogger(__name__)
|
|
34
|
-
logger.setLevel(logging.ERROR)
|
|
35
47
|
|
|
36
48
|
# Default configuration
|
|
37
49
|
DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
|
|
38
50
|
DEFAULT_COLLECTION_NAME = "default-collection-name"
|
|
51
|
+
ERROR_DETAIL_MAX_CHARS = 500
|
|
39
52
|
|
|
53
|
+
# Sentinel values for when tracing is disabled
|
|
54
|
+
DISABLED_AGENT_RUN_ID = "disabled"
|
|
55
|
+
DISABLED_TRANSCRIPT_ID = "disabled"
|
|
56
|
+
DISABLED_TRANSCRIPT_GROUP_ID = "disabled"
|
|
40
57
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
58
|
+
|
|
59
|
+
class DocentTelemetryRequestError(RuntimeError):
|
|
60
|
+
"""Raised when the Docent telemetry backend rejects a client request."""
|
|
44
61
|
|
|
45
62
|
|
|
46
63
|
class Instruments(Enum):
|
|
@@ -50,18 +67,13 @@ class Instruments(Enum):
|
|
|
50
67
|
ANTHROPIC = "anthropic"
|
|
51
68
|
BEDROCK = "bedrock"
|
|
52
69
|
LANGCHAIN = "langchain"
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def _is_notebook() -> bool:
|
|
56
|
-
"""Check if we're running in a Jupyter notebook."""
|
|
57
|
-
try:
|
|
58
|
-
return "ipykernel" in sys.modules
|
|
59
|
-
except Exception:
|
|
60
|
-
return False
|
|
70
|
+
GOOGLE_GENERATIVEAI = "google_generativeai"
|
|
61
71
|
|
|
62
72
|
|
|
63
73
|
class DocentTracer:
|
|
64
|
-
"""
|
|
74
|
+
"""
|
|
75
|
+
Manages Docent tracing setup and provides tracing utilities.
|
|
76
|
+
"""
|
|
65
77
|
|
|
66
78
|
def __init__(
|
|
67
79
|
self,
|
|
@@ -77,22 +89,6 @@ class DocentTracer:
|
|
|
77
89
|
instruments: Optional[Set[Instruments]] = None,
|
|
78
90
|
block_instruments: Optional[Set[Instruments]] = None,
|
|
79
91
|
):
|
|
80
|
-
"""
|
|
81
|
-
Initialize Docent tracing manager.
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
collection_name: Name of the collection for resource attributes
|
|
85
|
-
collection_id: Optional collection ID (auto-generated if not provided)
|
|
86
|
-
agent_run_id: Optional agent_run_id to use for code outside of an agent run context (auto-generated if not provided)
|
|
87
|
-
endpoint: OTLP endpoint URL(s) - can be a single string or list of strings for multiple endpoints
|
|
88
|
-
headers: Optional headers for authentication
|
|
89
|
-
api_key: Optional API key for bearer token authentication (takes precedence over env var)
|
|
90
|
-
enable_console_export: Whether to export to console
|
|
91
|
-
enable_otlp_export: Whether to export to OTLP endpoint
|
|
92
|
-
disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
|
|
93
|
-
instruments: Set of instruments to enable (None = all instruments)
|
|
94
|
-
block_instruments: Set of instruments to explicitly disable
|
|
95
|
-
"""
|
|
96
92
|
self._initialized: bool = False
|
|
97
93
|
# Check if tracing is disabled via environment variable
|
|
98
94
|
if _is_tracing_disabled():
|
|
@@ -157,14 +153,20 @@ class DocentTracer:
|
|
|
157
153
|
lambda: itertools.count(0)
|
|
158
154
|
)
|
|
159
155
|
self._transcript_counter_lock = threading.Lock()
|
|
156
|
+
self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
|
|
157
|
+
self._transcript_group_state_lock = threading.Lock()
|
|
160
158
|
self._flush_lock = threading.Lock()
|
|
161
159
|
|
|
162
160
|
def get_current_agent_run_id(self) -> Optional[str]:
|
|
163
161
|
"""
|
|
164
162
|
Get the current agent run ID from context.
|
|
165
163
|
|
|
164
|
+
Retrieves the agent run ID that was set in the current execution context.
|
|
165
|
+
If no agent run context is active, returns the default agent run ID.
|
|
166
|
+
|
|
166
167
|
Returns:
|
|
167
|
-
The current agent run ID if available,
|
|
168
|
+
The current agent run ID if available, or the default agent run ID
|
|
169
|
+
if no context is active.
|
|
168
170
|
"""
|
|
169
171
|
try:
|
|
170
172
|
return self._agent_run_id_var.get()
|
|
@@ -249,12 +251,23 @@ class DocentTracer:
|
|
|
249
251
|
return
|
|
250
252
|
|
|
251
253
|
try:
|
|
254
|
+
|
|
255
|
+
# Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
|
|
256
|
+
default_attribute_limit = 1024 * 16
|
|
257
|
+
env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
|
|
258
|
+
env_limit = int(env_value) if env_value.isdigit() else 0
|
|
259
|
+
attribute_limit = max(env_limit, default_attribute_limit)
|
|
260
|
+
|
|
261
|
+
span_limits = SpanLimits(
|
|
262
|
+
max_attributes=attribute_limit,
|
|
263
|
+
)
|
|
264
|
+
|
|
252
265
|
# Create our own isolated tracer provider
|
|
253
266
|
self._tracer_provider = TracerProvider(
|
|
254
|
-
resource=Resource.create({"service.name": self.collection_name})
|
|
267
|
+
resource=Resource.create({"service.name": self.collection_name}),
|
|
268
|
+
span_limits=span_limits,
|
|
255
269
|
)
|
|
256
270
|
|
|
257
|
-
# Add custom span processor for agent_run_id and transcript_id
|
|
258
271
|
class ContextSpanProcessor(SpanProcessor):
|
|
259
272
|
def __init__(self, manager: "DocentTracer"):
|
|
260
273
|
self.manager: "DocentTracer" = manager
|
|
@@ -312,11 +325,7 @@ class DocentTracer:
|
|
|
312
325
|
)
|
|
313
326
|
|
|
314
327
|
def on_end(self, span: ReadableSpan) -> None:
|
|
315
|
-
|
|
316
|
-
span_attrs = span.attributes or {}
|
|
317
|
-
logger.debug(
|
|
318
|
-
f"Completed span: name='{span.name}', collection_id={span_attrs.get('collection_id')}, agent_run_id={span_attrs.get('agent_run_id')}, transcript_id={span_attrs.get('transcript_id')}, duration_ns={span.end_time - span.start_time if span.end_time and span.start_time else 'unknown'}"
|
|
319
|
-
)
|
|
328
|
+
pass
|
|
320
329
|
|
|
321
330
|
def shutdown(self) -> None:
|
|
322
331
|
pass
|
|
@@ -410,6 +419,23 @@ class DocentTracer:
|
|
|
410
419
|
except Exception as e:
|
|
411
420
|
logger.warning(f"Failed to instrument LangChain: {e}")
|
|
412
421
|
|
|
422
|
+
# Instrument Google Generative AI with our isolated tracer provider
|
|
423
|
+
if Instruments.GOOGLE_GENERATIVEAI in enabled_instruments:
|
|
424
|
+
try:
|
|
425
|
+
if is_package_installed("google-generativeai") or is_package_installed(
|
|
426
|
+
"google-genai"
|
|
427
|
+
):
|
|
428
|
+
from opentelemetry.instrumentation.google_generativeai import (
|
|
429
|
+
GoogleGenerativeAiInstrumentor,
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
GoogleGenerativeAiInstrumentor().instrument(
|
|
433
|
+
tracer_provider=self._tracer_provider
|
|
434
|
+
)
|
|
435
|
+
logger.info("Instrumented Google Generative AI")
|
|
436
|
+
except Exception as e:
|
|
437
|
+
logger.warning(f"Failed to instrument Google Generative AI: {e}")
|
|
438
|
+
|
|
413
439
|
# Register cleanup handlers
|
|
414
440
|
self._register_cleanup()
|
|
415
441
|
|
|
@@ -422,7 +448,17 @@ class DocentTracer:
|
|
|
422
448
|
raise
|
|
423
449
|
|
|
424
450
|
def cleanup(self):
|
|
425
|
-
"""
|
|
451
|
+
"""
|
|
452
|
+
Clean up Docent tracing resources.
|
|
453
|
+
|
|
454
|
+
Flushes all pending spans to exporters and shuts down the tracer provider.
|
|
455
|
+
This method is automatically called during application shutdown via atexit
|
|
456
|
+
handlers, but can also be called manually for explicit cleanup.
|
|
457
|
+
|
|
458
|
+
The cleanup process:
|
|
459
|
+
1. Flushes all span processors to ensure data is exported
|
|
460
|
+
2. Shuts down the tracer provider and releases resources
|
|
461
|
+
"""
|
|
426
462
|
if self._disabled:
|
|
427
463
|
return
|
|
428
464
|
|
|
@@ -473,10 +509,28 @@ class DocentTracer:
|
|
|
473
509
|
if disabled and self._initialized:
|
|
474
510
|
self.cleanup()
|
|
475
511
|
|
|
476
|
-
def
|
|
512
|
+
def is_initialized(self) -> bool:
|
|
477
513
|
"""Verify if the manager is properly initialized."""
|
|
478
514
|
return self._initialized
|
|
479
515
|
|
|
516
|
+
def get_disabled_agent_run_id(self, agent_run_id: Optional[str]) -> str:
|
|
517
|
+
"""Return sentinel value for agent run ID when tracing is disabled."""
|
|
518
|
+
if agent_run_id is None:
|
|
519
|
+
return DISABLED_AGENT_RUN_ID
|
|
520
|
+
return agent_run_id
|
|
521
|
+
|
|
522
|
+
def get_disabled_transcript_id(self, transcript_id: Optional[str]) -> str:
|
|
523
|
+
"""Return sentinel value for transcript ID when tracing is disabled."""
|
|
524
|
+
if transcript_id is None:
|
|
525
|
+
return DISABLED_TRANSCRIPT_ID
|
|
526
|
+
return transcript_id
|
|
527
|
+
|
|
528
|
+
def get_disabled_transcript_group_id(self, transcript_group_id: Optional[str]) -> str:
|
|
529
|
+
"""Return sentinel value for transcript group ID when tracing is disabled."""
|
|
530
|
+
if transcript_group_id is None:
|
|
531
|
+
return DISABLED_TRANSCRIPT_GROUP_ID
|
|
532
|
+
return transcript_group_id
|
|
533
|
+
|
|
480
534
|
@contextmanager
|
|
481
535
|
def agent_run_context(
|
|
482
536
|
self,
|
|
@@ -498,11 +552,8 @@ class DocentTracer:
|
|
|
498
552
|
Tuple of (agent_run_id, transcript_id)
|
|
499
553
|
"""
|
|
500
554
|
if self._disabled:
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
agent_run_id = str(uuid.uuid4())
|
|
504
|
-
if transcript_id is None:
|
|
505
|
-
transcript_id = str(uuid.uuid4())
|
|
555
|
+
agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
|
|
556
|
+
transcript_id = self.get_disabled_transcript_id(transcript_id)
|
|
506
557
|
yield agent_run_id, transcript_id
|
|
507
558
|
return
|
|
508
559
|
|
|
@@ -525,7 +576,7 @@ class DocentTracer:
|
|
|
525
576
|
try:
|
|
526
577
|
self.send_agent_run_metadata(agent_run_id, metadata)
|
|
527
578
|
except Exception as e:
|
|
528
|
-
logger.
|
|
579
|
+
logger.error(f"Failed sending agent run metadata: {e}")
|
|
529
580
|
|
|
530
581
|
yield agent_run_id, transcript_id
|
|
531
582
|
finally:
|
|
@@ -555,11 +606,8 @@ class DocentTracer:
|
|
|
555
606
|
Tuple of (agent_run_id, transcript_id)
|
|
556
607
|
"""
|
|
557
608
|
if self._disabled:
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
agent_run_id = str(uuid.uuid4())
|
|
561
|
-
if transcript_id is None:
|
|
562
|
-
transcript_id = str(uuid.uuid4())
|
|
609
|
+
agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
|
|
610
|
+
transcript_id = self.get_disabled_transcript_id(transcript_id)
|
|
563
611
|
yield agent_run_id, transcript_id
|
|
564
612
|
return
|
|
565
613
|
|
|
@@ -605,15 +653,184 @@ class DocentTracer:
|
|
|
605
653
|
|
|
606
654
|
return headers
|
|
607
655
|
|
|
656
|
+
def _ensure_json_serializable_metadata(self, metadata: Dict[str, Any], context: str) -> None:
|
|
657
|
+
"""
|
|
658
|
+
Validate that metadata can be serialized to JSON before sending it to the backend.
|
|
659
|
+
"""
|
|
660
|
+
try:
|
|
661
|
+
json.dumps(metadata)
|
|
662
|
+
except (TypeError, ValueError) as exc:
|
|
663
|
+
raise TypeError(f"{context} metadata must be JSON serializable") from exc
|
|
664
|
+
offending_path = self._find_null_character_path(metadata)
|
|
665
|
+
if offending_path is not None:
|
|
666
|
+
raise ValueError(
|
|
667
|
+
f"{context} metadata cannot contain null characters (found at {offending_path}). "
|
|
668
|
+
"Remove or replace '\\u0000' before calling Docent tracing APIs."
|
|
669
|
+
)
|
|
670
|
+
|
|
608
671
|
def _post_json(self, path: str, data: Dict[str, Any]) -> None:
|
|
672
|
+
self._post_json_sync(path, data)
|
|
673
|
+
|
|
674
|
+
def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
|
|
609
675
|
if not self._api_endpoint_base:
|
|
610
676
|
raise RuntimeError("API endpoint base is not configured")
|
|
611
677
|
url = f"{self._api_endpoint_base}{path}"
|
|
612
678
|
try:
|
|
613
679
|
resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
|
|
614
680
|
resp.raise_for_status()
|
|
615
|
-
except requests.exceptions.RequestException as
|
|
616
|
-
|
|
681
|
+
except requests.exceptions.RequestException as exc:
|
|
682
|
+
message = self._format_request_exception(url, exc)
|
|
683
|
+
raise DocentTelemetryRequestError(message) from exc
|
|
684
|
+
|
|
685
|
+
def _format_request_exception(self, url: str, exc: requests.exceptions.RequestException) -> str:
|
|
686
|
+
response: Optional[Response] = getattr(exc, "response", None)
|
|
687
|
+
message_parts: List[str] = [f"Failed POST {url}"]
|
|
688
|
+
suggestion: Optional[str]
|
|
689
|
+
|
|
690
|
+
if response is not None:
|
|
691
|
+
status_phrase = f"HTTP {response.status_code}"
|
|
692
|
+
if response.reason:
|
|
693
|
+
status_phrase = f"{status_phrase} {response.reason}"
|
|
694
|
+
message_parts.append(f"({status_phrase})")
|
|
695
|
+
|
|
696
|
+
detail = self._extract_response_detail(response)
|
|
697
|
+
if detail:
|
|
698
|
+
message_parts.append(f"- Backend detail: {detail}")
|
|
699
|
+
|
|
700
|
+
request_id = response.headers.get("x-request-id")
|
|
701
|
+
if request_id:
|
|
702
|
+
message_parts.append(f"(request-id: {request_id})")
|
|
703
|
+
|
|
704
|
+
suggestion = self._suggest_fix_for_status(response.status_code)
|
|
705
|
+
else:
|
|
706
|
+
message_parts.append(f"- {exc}")
|
|
707
|
+
suggestion = self._suggest_fix_for_status(None)
|
|
708
|
+
|
|
709
|
+
if suggestion:
|
|
710
|
+
message_parts.append(suggestion)
|
|
711
|
+
|
|
712
|
+
return " ".join(part for part in message_parts if part)
|
|
713
|
+
|
|
714
|
+
def _extract_response_detail(self, response: Response) -> Optional[str]:
|
|
715
|
+
try:
|
|
716
|
+
body = response.json()
|
|
717
|
+
except ValueError:
|
|
718
|
+
text = response.text.strip()
|
|
719
|
+
if not text:
|
|
720
|
+
return None
|
|
721
|
+
normalized = " ".join(text.split())
|
|
722
|
+
return self._truncate_error_message(normalized)
|
|
723
|
+
|
|
724
|
+
if isinstance(body, dict):
|
|
725
|
+
typed_body = cast(Dict[str, Any], body)
|
|
726
|
+
structured_message = self._structured_detail_message(typed_body)
|
|
727
|
+
if structured_message:
|
|
728
|
+
return self._truncate_error_message(structured_message)
|
|
729
|
+
return self._truncate_error_message(self._normalize_error_value(typed_body))
|
|
730
|
+
|
|
731
|
+
return self._truncate_error_message(self._normalize_error_value(body))
|
|
732
|
+
|
|
733
|
+
def _structured_detail_message(self, data: Dict[str, Any]) -> Optional[str]:
|
|
734
|
+
for key in ("detail", "message", "error"):
|
|
735
|
+
if key in data:
|
|
736
|
+
structured_value = self._structured_detail_value(data[key])
|
|
737
|
+
if structured_value:
|
|
738
|
+
return structured_value
|
|
739
|
+
return self._structured_detail_value(data)
|
|
740
|
+
|
|
741
|
+
def _structured_detail_value(self, value: Any) -> Optional[str]:
|
|
742
|
+
if isinstance(value, Mapping):
|
|
743
|
+
mapping_value = cast(Mapping[str, Any], value)
|
|
744
|
+
message = mapping_value.get("message")
|
|
745
|
+
hint = mapping_value.get("hint")
|
|
746
|
+
error_code = mapping_value.get("error_code")
|
|
747
|
+
request_id = mapping_value.get("request_id")
|
|
748
|
+
fallback_detail = mapping_value.get("detail")
|
|
749
|
+
|
|
750
|
+
parts: List[str] = []
|
|
751
|
+
if isinstance(message, str) and message.strip():
|
|
752
|
+
parts.append(message.strip())
|
|
753
|
+
elif isinstance(fallback_detail, str) and fallback_detail.strip():
|
|
754
|
+
parts.append(fallback_detail.strip())
|
|
755
|
+
|
|
756
|
+
if isinstance(hint, str) and hint.strip():
|
|
757
|
+
parts.append(f"(hint: {hint.strip()})")
|
|
758
|
+
if isinstance(error_code, str) and error_code.strip():
|
|
759
|
+
parts.append(f"[code: {error_code.strip()}]")
|
|
760
|
+
if isinstance(request_id, str) and request_id.strip():
|
|
761
|
+
parts.append(f"(request-id: {request_id.strip()})")
|
|
762
|
+
|
|
763
|
+
return " ".join(parts) if parts else None
|
|
764
|
+
|
|
765
|
+
if isinstance(value, str) and value.strip():
|
|
766
|
+
return value.strip()
|
|
767
|
+
|
|
768
|
+
return None
|
|
769
|
+
|
|
770
|
+
def _normalize_error_value(self, value: Any) -> str:
|
|
771
|
+
if isinstance(value, str):
|
|
772
|
+
return " ".join(value.split())
|
|
773
|
+
|
|
774
|
+
try:
|
|
775
|
+
serialized = json.dumps(value)
|
|
776
|
+
except (TypeError, ValueError):
|
|
777
|
+
serialized = str(value)
|
|
778
|
+
|
|
779
|
+
return " ".join(serialized.split())
|
|
780
|
+
|
|
781
|
+
def _truncate_error_message(self, message: str) -> str:
|
|
782
|
+
message = message.strip()
|
|
783
|
+
if len(message) <= ERROR_DETAIL_MAX_CHARS:
|
|
784
|
+
return message
|
|
785
|
+
return f"{message[:ERROR_DETAIL_MAX_CHARS]}..."
|
|
786
|
+
|
|
787
|
+
def _suggest_fix_for_status(self, status_code: Optional[int]) -> Optional[str]:
|
|
788
|
+
if status_code in (401, 403):
|
|
789
|
+
return (
|
|
790
|
+
"Verify that the Authorization header or DOCENT_API_KEY grants write access to the "
|
|
791
|
+
"target collection."
|
|
792
|
+
)
|
|
793
|
+
if status_code == 404:
|
|
794
|
+
return (
|
|
795
|
+
"Ensure the tracing endpoint passed to initialize_tracing matches the Docent server's "
|
|
796
|
+
"/rest/telemetry route."
|
|
797
|
+
)
|
|
798
|
+
if status_code in (400, 422):
|
|
799
|
+
return (
|
|
800
|
+
"Confirm the payload includes collection_id, agent_run_id, metadata, and timestamp in "
|
|
801
|
+
"the expected format."
|
|
802
|
+
)
|
|
803
|
+
if status_code and status_code >= 500:
|
|
804
|
+
return "Inspect the Docent backend logs for the referenced request."
|
|
805
|
+
if status_code is None:
|
|
806
|
+
return "Confirm the Docent telemetry endpoint is reachable from this process."
|
|
807
|
+
return None
|
|
808
|
+
|
|
809
|
+
def _find_null_character_path(self, value: Any, path: str = "") -> Optional[str]:
|
|
810
|
+
"""Backend rejects NUL bytes, so detect them before we send metadata to the backend."""
|
|
811
|
+
return None
|
|
812
|
+
if isinstance(value, str):
|
|
813
|
+
if "\x00" in value or "\\u0000" in value or "\\x00" in value:
|
|
814
|
+
return path or "<root>"
|
|
815
|
+
return None
|
|
816
|
+
|
|
817
|
+
if isinstance(value, dict):
|
|
818
|
+
for key, item in value.items():
|
|
819
|
+
next_path = f"{path}.{key}" if path else str(key)
|
|
820
|
+
result = self._find_null_character_path(item, next_path)
|
|
821
|
+
if result:
|
|
822
|
+
return result
|
|
823
|
+
return None
|
|
824
|
+
|
|
825
|
+
if isinstance(value, (list, tuple)):
|
|
826
|
+
for index, item in enumerate(value):
|
|
827
|
+
next_path = f"{path}[{index}]" if path else f"[{index}]"
|
|
828
|
+
result = self._find_null_character_path(item, next_path)
|
|
829
|
+
if result:
|
|
830
|
+
return result
|
|
831
|
+
return None
|
|
832
|
+
|
|
833
|
+
return None
|
|
617
834
|
|
|
618
835
|
def send_agent_run_score(
|
|
619
836
|
self,
|
|
@@ -650,6 +867,8 @@ class DocentTracer:
|
|
|
650
867
|
if self._disabled:
|
|
651
868
|
return
|
|
652
869
|
|
|
870
|
+
self._ensure_json_serializable_metadata(metadata, "Agent run")
|
|
871
|
+
|
|
653
872
|
collection_id = self.collection_id
|
|
654
873
|
payload: Dict[str, Any] = {
|
|
655
874
|
"collection_id": collection_id,
|
|
@@ -695,6 +914,7 @@ class DocentTracer:
|
|
|
695
914
|
if transcript_group_id is not None:
|
|
696
915
|
payload["transcript_group_id"] = transcript_group_id
|
|
697
916
|
if metadata is not None:
|
|
917
|
+
self._ensure_json_serializable_metadata(metadata, "Transcript")
|
|
698
918
|
payload["metadata"] = metadata
|
|
699
919
|
|
|
700
920
|
self._post_json("/v1/transcript-metadata", payload)
|
|
@@ -746,9 +966,7 @@ class DocentTracer:
|
|
|
746
966
|
The transcript ID
|
|
747
967
|
"""
|
|
748
968
|
if self._disabled:
|
|
749
|
-
|
|
750
|
-
if transcript_id is None:
|
|
751
|
-
transcript_id = str(uuid.uuid4())
|
|
969
|
+
transcript_id = self.get_disabled_transcript_id(transcript_id)
|
|
752
970
|
yield transcript_id
|
|
753
971
|
return
|
|
754
972
|
|
|
@@ -778,7 +996,7 @@ class DocentTracer:
|
|
|
778
996
|
transcript_id, name, description, transcript_group_id, metadata
|
|
779
997
|
)
|
|
780
998
|
except Exception as e:
|
|
781
|
-
logger.
|
|
999
|
+
logger.error(f"Failed sending transcript data: {e}")
|
|
782
1000
|
|
|
783
1001
|
yield transcript_id
|
|
784
1002
|
finally:
|
|
@@ -808,9 +1026,7 @@ class DocentTracer:
|
|
|
808
1026
|
The transcript ID
|
|
809
1027
|
"""
|
|
810
1028
|
if self._disabled:
|
|
811
|
-
|
|
812
|
-
if transcript_id is None:
|
|
813
|
-
transcript_id = str(uuid.uuid4())
|
|
1029
|
+
transcript_id = self.get_disabled_transcript_id(transcript_id)
|
|
814
1030
|
yield transcript_id
|
|
815
1031
|
return
|
|
816
1032
|
|
|
@@ -840,7 +1056,7 @@ class DocentTracer:
|
|
|
840
1056
|
transcript_id, name, description, transcript_group_id, metadata
|
|
841
1057
|
)
|
|
842
1058
|
except Exception as e:
|
|
843
|
-
logger.
|
|
1059
|
+
logger.error(f"Failed sending transcript data: {e}")
|
|
844
1060
|
|
|
845
1061
|
yield transcript_id
|
|
846
1062
|
finally:
|
|
@@ -878,6 +1094,27 @@ class DocentTracer:
|
|
|
878
1094
|
)
|
|
879
1095
|
return
|
|
880
1096
|
|
|
1097
|
+
with self._transcript_group_state_lock:
|
|
1098
|
+
state: dict[str, Optional[str]] = self._transcript_group_states.setdefault(
|
|
1099
|
+
transcript_group_id, {}
|
|
1100
|
+
)
|
|
1101
|
+
final_name: Optional[str] = name if name is not None else state.get("name")
|
|
1102
|
+
final_description: Optional[str] = (
|
|
1103
|
+
description if description is not None else state.get("description")
|
|
1104
|
+
)
|
|
1105
|
+
final_parent_transcript_group_id: Optional[str] = (
|
|
1106
|
+
parent_transcript_group_id
|
|
1107
|
+
if parent_transcript_group_id is not None
|
|
1108
|
+
else state.get("parent_transcript_group_id")
|
|
1109
|
+
)
|
|
1110
|
+
|
|
1111
|
+
if final_name is not None:
|
|
1112
|
+
state["name"] = final_name
|
|
1113
|
+
if final_description is not None:
|
|
1114
|
+
state["description"] = final_description
|
|
1115
|
+
if final_parent_transcript_group_id is not None:
|
|
1116
|
+
state["parent_transcript_group_id"] = final_parent_transcript_group_id
|
|
1117
|
+
|
|
881
1118
|
payload: Dict[str, Any] = {
|
|
882
1119
|
"collection_id": collection_id,
|
|
883
1120
|
"transcript_group_id": transcript_group_id,
|
|
@@ -885,13 +1122,14 @@ class DocentTracer:
|
|
|
885
1122
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
886
1123
|
}
|
|
887
1124
|
|
|
888
|
-
if
|
|
889
|
-
payload["name"] =
|
|
890
|
-
if
|
|
891
|
-
payload["description"] =
|
|
892
|
-
if
|
|
893
|
-
payload["parent_transcript_group_id"] =
|
|
1125
|
+
if final_name is not None:
|
|
1126
|
+
payload["name"] = final_name
|
|
1127
|
+
if final_description is not None:
|
|
1128
|
+
payload["description"] = final_description
|
|
1129
|
+
if final_parent_transcript_group_id is not None:
|
|
1130
|
+
payload["parent_transcript_group_id"] = final_parent_transcript_group_id
|
|
894
1131
|
if metadata is not None:
|
|
1132
|
+
self._ensure_json_serializable_metadata(metadata, "Transcript group")
|
|
895
1133
|
payload["metadata"] = metadata
|
|
896
1134
|
|
|
897
1135
|
self._post_json("/v1/transcript-group-metadata", payload)
|
|
@@ -919,9 +1157,7 @@ class DocentTracer:
|
|
|
919
1157
|
The transcript group ID
|
|
920
1158
|
"""
|
|
921
1159
|
if self._disabled:
|
|
922
|
-
|
|
923
|
-
if transcript_group_id is None:
|
|
924
|
-
transcript_group_id = str(uuid.uuid4())
|
|
1160
|
+
transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
|
|
925
1161
|
yield transcript_group_id
|
|
926
1162
|
return
|
|
927
1163
|
|
|
@@ -953,7 +1189,7 @@ class DocentTracer:
|
|
|
953
1189
|
transcript_group_id, name, description, parent_transcript_group_id, metadata
|
|
954
1190
|
)
|
|
955
1191
|
except Exception as e:
|
|
956
|
-
logger.
|
|
1192
|
+
logger.error(f"Failed sending transcript group data: {e}")
|
|
957
1193
|
|
|
958
1194
|
yield transcript_group_id
|
|
959
1195
|
finally:
|
|
@@ -983,9 +1219,7 @@ class DocentTracer:
|
|
|
983
1219
|
The transcript group ID
|
|
984
1220
|
"""
|
|
985
1221
|
if self._disabled:
|
|
986
|
-
|
|
987
|
-
if transcript_group_id is None:
|
|
988
|
-
transcript_group_id = str(uuid.uuid4())
|
|
1222
|
+
transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
|
|
989
1223
|
yield transcript_group_id
|
|
990
1224
|
return
|
|
991
1225
|
|
|
@@ -1017,7 +1251,7 @@ class DocentTracer:
|
|
|
1017
1251
|
transcript_group_id, name, description, parent_transcript_group_id, metadata
|
|
1018
1252
|
)
|
|
1019
1253
|
except Exception as e:
|
|
1020
|
-
logger.
|
|
1254
|
+
logger.error(f"Failed sending transcript group data: {e}")
|
|
1021
1255
|
|
|
1022
1256
|
yield transcript_group_id
|
|
1023
1257
|
finally:
|
|
@@ -1063,8 +1297,9 @@ def initialize_tracing(
|
|
|
1063
1297
|
collection_id: Optional collection ID (auto-generated if not provided)
|
|
1064
1298
|
endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
|
|
1065
1299
|
headers: Optional headers for authentication
|
|
1066
|
-
api_key: Optional API key for bearer token authentication (takes precedence
|
|
1067
|
-
|
|
1300
|
+
api_key: Optional API key for bearer token authentication (takes precedence
|
|
1301
|
+
over DOCENT_API_KEY environment variable)
|
|
1302
|
+
enable_console_export: Whether to export spans to console for debugging
|
|
1068
1303
|
enable_otlp_export: Whether to export spans to OTLP endpoint
|
|
1069
1304
|
disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
|
|
1070
1305
|
instruments: Set of instruments to enable (None = all instruments).
|
|
@@ -1074,7 +1309,6 @@ def initialize_tracing(
|
|
|
1074
1309
|
The initialized Docent tracer
|
|
1075
1310
|
|
|
1076
1311
|
Example:
|
|
1077
|
-
# Basic setup
|
|
1078
1312
|
initialize_tracing("my-collection")
|
|
1079
1313
|
"""
|
|
1080
1314
|
|
|
@@ -1137,17 +1371,17 @@ def close_tracing() -> None:
|
|
|
1137
1371
|
def flush_tracing() -> None:
|
|
1138
1372
|
"""Force flush all spans to exporters."""
|
|
1139
1373
|
if _global_tracer:
|
|
1140
|
-
logger.debug("Flushing
|
|
1374
|
+
logger.debug("Flushing Docent tracer")
|
|
1141
1375
|
_global_tracer.flush()
|
|
1142
1376
|
else:
|
|
1143
1377
|
logger.debug("No global tracer available to flush")
|
|
1144
1378
|
|
|
1145
1379
|
|
|
1146
|
-
def
|
|
1380
|
+
def is_initialized() -> bool:
|
|
1147
1381
|
"""Verify if the global Docent tracer is properly initialized."""
|
|
1148
1382
|
if _global_tracer is None:
|
|
1149
1383
|
return False
|
|
1150
|
-
return _global_tracer.
|
|
1384
|
+
return _global_tracer.is_initialized()
|
|
1151
1385
|
|
|
1152
1386
|
|
|
1153
1387
|
def is_disabled() -> bool:
|
|
@@ -1221,28 +1455,33 @@ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
|
|
|
1221
1455
|
|
|
1222
1456
|
tracer.send_agent_run_metadata(agent_run_id, metadata)
|
|
1223
1457
|
except Exception as e:
|
|
1224
|
-
logger.error(f"Failed to send metadata: {e}")
|
|
1458
|
+
logger.error(f"Failed to send agent run metadata: {e}")
|
|
1225
1459
|
|
|
1226
1460
|
|
|
1227
1461
|
def transcript_metadata(
|
|
1462
|
+
metadata: Dict[str, Any],
|
|
1463
|
+
*,
|
|
1228
1464
|
name: Optional[str] = None,
|
|
1229
1465
|
description: Optional[str] = None,
|
|
1230
1466
|
transcript_group_id: Optional[str] = None,
|
|
1231
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
1232
1467
|
) -> None:
|
|
1233
1468
|
"""
|
|
1234
1469
|
Send transcript metadata directly to the backend for the current transcript.
|
|
1235
1470
|
|
|
1236
1471
|
Args:
|
|
1472
|
+
metadata: Dictionary of metadata to attach to the current transcript (required)
|
|
1237
1473
|
name: Optional transcript name
|
|
1238
1474
|
description: Optional transcript description
|
|
1239
|
-
|
|
1240
|
-
metadata: Optional metadata to send
|
|
1475
|
+
transcript_group_id: Optional transcript group ID to associate with
|
|
1241
1476
|
|
|
1242
1477
|
Example:
|
|
1243
|
-
transcript_metadata(
|
|
1244
|
-
transcript_metadata(
|
|
1245
|
-
transcript_metadata(
|
|
1478
|
+
transcript_metadata({"user": "John", "model": "gpt-4"})
|
|
1479
|
+
transcript_metadata({"env": "prod"}, name="data_processing")
|
|
1480
|
+
transcript_metadata(
|
|
1481
|
+
{"team": "search"},
|
|
1482
|
+
name="validation",
|
|
1483
|
+
transcript_group_id="group-123",
|
|
1484
|
+
)
|
|
1246
1485
|
"""
|
|
1247
1486
|
try:
|
|
1248
1487
|
tracer = get_tracer()
|
|
@@ -1260,6 +1499,47 @@ def transcript_metadata(
|
|
|
1260
1499
|
logger.error(f"Failed to send transcript metadata: {e}")
|
|
1261
1500
|
|
|
1262
1501
|
|
|
1502
|
+
def transcript_group_metadata(
|
|
1503
|
+
metadata: Dict[str, Any],
|
|
1504
|
+
*,
|
|
1505
|
+
name: Optional[str] = None,
|
|
1506
|
+
description: Optional[str] = None,
|
|
1507
|
+
parent_transcript_group_id: Optional[str] = None,
|
|
1508
|
+
) -> None:
|
|
1509
|
+
"""
|
|
1510
|
+
Send transcript group metadata directly to the backend for the current transcript group.
|
|
1511
|
+
|
|
1512
|
+
Args:
|
|
1513
|
+
metadata: Dictionary of metadata to attach to the current transcript group (required)
|
|
1514
|
+
name: Optional transcript group name
|
|
1515
|
+
description: Optional transcript group description
|
|
1516
|
+
parent_transcript_group_id: Optional parent transcript group ID
|
|
1517
|
+
|
|
1518
|
+
Example:
|
|
1519
|
+
transcript_group_metadata({"team": "search", "env": "prod"})
|
|
1520
|
+
transcript_group_metadata({"env": "prod"}, name="pipeline")
|
|
1521
|
+
transcript_group_metadata(
|
|
1522
|
+
{"team": "search"},
|
|
1523
|
+
name="pipeline",
|
|
1524
|
+
parent_transcript_group_id="root-group",
|
|
1525
|
+
)
|
|
1526
|
+
"""
|
|
1527
|
+
try:
|
|
1528
|
+
tracer = get_tracer()
|
|
1529
|
+
if tracer.is_disabled():
|
|
1530
|
+
return
|
|
1531
|
+
transcript_group_id = tracer.get_current_transcript_group_id()
|
|
1532
|
+
if not transcript_group_id:
|
|
1533
|
+
logger.warning("No active transcript group context. Metadata will not be sent.")
|
|
1534
|
+
return
|
|
1535
|
+
|
|
1536
|
+
tracer.send_transcript_group_metadata(
|
|
1537
|
+
transcript_group_id, name, description, parent_transcript_group_id, metadata
|
|
1538
|
+
)
|
|
1539
|
+
except Exception as e:
|
|
1540
|
+
logger.error(f"Failed to send transcript group metadata: {e}")
|
|
1541
|
+
|
|
1542
|
+
|
|
1263
1543
|
class AgentRunContext:
|
|
1264
1544
|
"""Context manager that works in both sync and async contexts."""
|
|
1265
1545
|
|
|
@@ -1279,6 +1559,11 @@ class AgentRunContext:
|
|
|
1279
1559
|
|
|
1280
1560
|
def __enter__(self) -> tuple[str, str]:
|
|
1281
1561
|
"""Sync context manager entry."""
|
|
1562
|
+
if is_disabled():
|
|
1563
|
+
tracer = get_tracer()
|
|
1564
|
+
self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
|
|
1565
|
+
self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
|
|
1566
|
+
return self.agent_run_id, self.transcript_id
|
|
1282
1567
|
self._sync_context = get_tracer().agent_run_context(
|
|
1283
1568
|
self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
|
|
1284
1569
|
)
|
|
@@ -1291,6 +1576,11 @@ class AgentRunContext:
|
|
|
1291
1576
|
|
|
1292
1577
|
async def __aenter__(self) -> tuple[str, str]:
|
|
1293
1578
|
"""Async context manager entry."""
|
|
1579
|
+
if is_disabled():
|
|
1580
|
+
tracer = get_tracer()
|
|
1581
|
+
self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
|
|
1582
|
+
self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
|
|
1583
|
+
return self.agent_run_id, self.transcript_id
|
|
1294
1584
|
self._async_context = get_tracer().async_agent_run_context(
|
|
1295
1585
|
self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
|
|
1296
1586
|
)
|
|
@@ -1431,6 +1721,10 @@ class TranscriptContext:
|
|
|
1431
1721
|
|
|
1432
1722
|
def __enter__(self) -> str:
|
|
1433
1723
|
"""Sync context manager entry."""
|
|
1724
|
+
if is_disabled():
|
|
1725
|
+
tracer = get_tracer()
|
|
1726
|
+
self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
|
|
1727
|
+
return self.transcript_id
|
|
1434
1728
|
self._sync_context = get_tracer().transcript_context(
|
|
1435
1729
|
name=self.name,
|
|
1436
1730
|
transcript_id=self.transcript_id,
|
|
@@ -1447,6 +1741,10 @@ class TranscriptContext:
|
|
|
1447
1741
|
|
|
1448
1742
|
async def __aenter__(self) -> str:
|
|
1449
1743
|
"""Async context manager entry."""
|
|
1744
|
+
if is_disabled():
|
|
1745
|
+
tracer = get_tracer()
|
|
1746
|
+
self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
|
|
1747
|
+
return self.transcript_id
|
|
1450
1748
|
self._async_context = get_tracer().async_transcript_context(
|
|
1451
1749
|
name=self.name,
|
|
1452
1750
|
transcript_id=self.transcript_id,
|
|
@@ -1608,6 +1906,12 @@ class TranscriptGroupContext:
|
|
|
1608
1906
|
|
|
1609
1907
|
def __enter__(self) -> str:
|
|
1610
1908
|
"""Sync context manager entry."""
|
|
1909
|
+
if is_disabled():
|
|
1910
|
+
tracer = get_tracer()
|
|
1911
|
+
self.transcript_group_id = tracer.get_disabled_transcript_group_id(
|
|
1912
|
+
self.transcript_group_id
|
|
1913
|
+
)
|
|
1914
|
+
return self.transcript_group_id
|
|
1611
1915
|
self._sync_context = get_tracer().transcript_group_context(
|
|
1612
1916
|
name=self.name,
|
|
1613
1917
|
transcript_group_id=self.transcript_group_id,
|
|
@@ -1624,6 +1928,12 @@ class TranscriptGroupContext:
|
|
|
1624
1928
|
|
|
1625
1929
|
async def __aenter__(self) -> str:
|
|
1626
1930
|
"""Async context manager entry."""
|
|
1931
|
+
if is_disabled():
|
|
1932
|
+
tracer = get_tracer()
|
|
1933
|
+
self.transcript_group_id = tracer.get_disabled_transcript_group_id(
|
|
1934
|
+
self.transcript_group_id
|
|
1935
|
+
)
|
|
1936
|
+
return self.transcript_group_id
|
|
1627
1937
|
self._async_context = get_tracer().async_transcript_group_context(
|
|
1628
1938
|
name=self.name,
|
|
1629
1939
|
transcript_group_id=self.transcript_group_id,
|
|
@@ -1764,3 +2074,16 @@ def transcript_group_context(
|
|
|
1764
2074
|
return TranscriptGroupContext(
|
|
1765
2075
|
name, transcript_group_id, description, metadata, parent_transcript_group_id
|
|
1766
2076
|
)
|
|
2077
|
+
|
|
2078
|
+
|
|
2079
|
+
def _is_tracing_disabled() -> bool:
|
|
2080
|
+
"""Check if tracing is disabled via environment variable."""
|
|
2081
|
+
return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
|
|
2082
|
+
|
|
2083
|
+
|
|
2084
|
+
def _is_notebook() -> bool:
|
|
2085
|
+
"""Check if we're running in a Jupyter notebook."""
|
|
2086
|
+
try:
|
|
2087
|
+
return "ipykernel" in sys.modules
|
|
2088
|
+
except Exception:
|
|
2089
|
+
return False
|