docent-python 0.1.14a0__py3-none-any.whl → 0.1.28a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docent-python might be problematic. Click here for more details.

Files changed (46) hide show
  1. docent/_llm_util/__init__.py +0 -0
  2. docent/_llm_util/data_models/__init__.py +0 -0
  3. docent/_llm_util/data_models/exceptions.py +48 -0
  4. docent/_llm_util/data_models/llm_output.py +331 -0
  5. docent/_llm_util/llm_cache.py +193 -0
  6. docent/_llm_util/llm_svc.py +472 -0
  7. docent/_llm_util/model_registry.py +130 -0
  8. docent/_llm_util/providers/__init__.py +0 -0
  9. docent/_llm_util/providers/anthropic.py +537 -0
  10. docent/_llm_util/providers/common.py +41 -0
  11. docent/_llm_util/providers/google.py +530 -0
  12. docent/_llm_util/providers/openai.py +745 -0
  13. docent/_llm_util/providers/openrouter.py +375 -0
  14. docent/_llm_util/providers/preference_types.py +104 -0
  15. docent/_llm_util/providers/provider_registry.py +164 -0
  16. docent/data_models/__init__.py +2 -0
  17. docent/data_models/agent_run.py +17 -29
  18. docent/data_models/chat/__init__.py +6 -1
  19. docent/data_models/chat/message.py +3 -1
  20. docent/data_models/citation.py +103 -22
  21. docent/data_models/judge.py +19 -0
  22. docent/data_models/metadata_util.py +16 -0
  23. docent/data_models/remove_invalid_citation_ranges.py +23 -10
  24. docent/data_models/transcript.py +25 -80
  25. docent/data_models/util.py +170 -0
  26. docent/judges/__init__.py +23 -0
  27. docent/judges/analysis.py +77 -0
  28. docent/judges/impl.py +587 -0
  29. docent/judges/runner.py +129 -0
  30. docent/judges/stats.py +205 -0
  31. docent/judges/types.py +311 -0
  32. docent/judges/util/forgiving_json.py +108 -0
  33. docent/judges/util/meta_schema.json +86 -0
  34. docent/judges/util/meta_schema.py +29 -0
  35. docent/judges/util/parse_output.py +87 -0
  36. docent/judges/util/voting.py +139 -0
  37. docent/sdk/agent_run_writer.py +72 -21
  38. docent/sdk/client.py +276 -23
  39. docent/trace.py +413 -90
  40. {docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/METADATA +13 -5
  41. docent_python-0.1.28a0.dist-info/RECORD +59 -0
  42. docent/data_models/metadata.py +0 -229
  43. docent/data_models/yaml_util.py +0 -12
  44. docent_python-0.1.14a0.dist-info/RECORD +0 -32
  45. {docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/WHEEL +0 -0
  46. {docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/licenses/LICENSE.md +0 -0
docent/trace.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import atexit
2
2
  import contextvars
3
3
  import itertools
4
+ import json
4
5
  import logging
5
6
  import os
6
7
  import sys
@@ -12,7 +13,19 @@ from contextvars import ContextVar, Token
12
13
  from datetime import datetime, timezone
13
14
  from enum import Enum
14
15
  from importlib.metadata import Distribution, distributions
15
- from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Optional, Set, Union
16
+ from typing import (
17
+ Any,
18
+ AsyncIterator,
19
+ Callable,
20
+ Dict,
21
+ Iterator,
22
+ List,
23
+ Mapping,
24
+ Optional,
25
+ Set,
26
+ Union,
27
+ cast,
28
+ )
16
29
 
17
30
  import requests
18
31
  from opentelemetry import trace
@@ -21,26 +34,30 @@ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExport
21
34
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
22
35
  from opentelemetry.instrumentation.threading import ThreadingInstrumentor
23
36
  from opentelemetry.sdk.resources import Resource
24
- from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
37
+ from opentelemetry.sdk.trace import ReadableSpan, SpanLimits, SpanProcessor, TracerProvider
25
38
  from opentelemetry.sdk.trace.export import (
26
39
  BatchSpanProcessor,
27
40
  ConsoleSpanExporter,
28
41
  SimpleSpanProcessor,
29
42
  )
30
43
  from opentelemetry.trace import Span
44
+ from requests import Response
31
45
 
32
- # Configure logging
33
46
  logger = logging.getLogger(__name__)
34
- logger.setLevel(logging.ERROR)
35
47
 
36
48
  # Default configuration
37
49
  DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
38
50
  DEFAULT_COLLECTION_NAME = "default-collection-name"
51
+ ERROR_DETAIL_MAX_CHARS = 500
39
52
 
53
+ # Sentinel values for when tracing is disabled
54
+ DISABLED_AGENT_RUN_ID = "disabled"
55
+ DISABLED_TRANSCRIPT_ID = "disabled"
56
+ DISABLED_TRANSCRIPT_GROUP_ID = "disabled"
40
57
 
41
- def _is_tracing_disabled() -> bool:
42
- """Check if tracing is disabled via environment variable."""
43
- return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
58
+
59
+ class DocentTelemetryRequestError(RuntimeError):
60
+ """Raised when the Docent telemetry backend rejects a client request."""
44
61
 
45
62
 
46
63
  class Instruments(Enum):
@@ -50,18 +67,13 @@ class Instruments(Enum):
50
67
  ANTHROPIC = "anthropic"
51
68
  BEDROCK = "bedrock"
52
69
  LANGCHAIN = "langchain"
53
-
54
-
55
- def _is_notebook() -> bool:
56
- """Check if we're running in a Jupyter notebook."""
57
- try:
58
- return "ipykernel" in sys.modules
59
- except Exception:
60
- return False
70
+ GOOGLE_GENERATIVEAI = "google_generativeai"
61
71
 
62
72
 
63
73
  class DocentTracer:
64
- """Manages Docent tracing setup and provides tracing utilities."""
74
+ """
75
+ Manages Docent tracing setup and provides tracing utilities.
76
+ """
65
77
 
66
78
  def __init__(
67
79
  self,
@@ -77,22 +89,6 @@ class DocentTracer:
77
89
  instruments: Optional[Set[Instruments]] = None,
78
90
  block_instruments: Optional[Set[Instruments]] = None,
79
91
  ):
80
- """
81
- Initialize Docent tracing manager.
82
-
83
- Args:
84
- collection_name: Name of the collection for resource attributes
85
- collection_id: Optional collection ID (auto-generated if not provided)
86
- agent_run_id: Optional agent_run_id to use for code outside of an agent run context (auto-generated if not provided)
87
- endpoint: OTLP endpoint URL(s) - can be a single string or list of strings for multiple endpoints
88
- headers: Optional headers for authentication
89
- api_key: Optional API key for bearer token authentication (takes precedence over env var)
90
- enable_console_export: Whether to export to console
91
- enable_otlp_export: Whether to export to OTLP endpoint
92
- disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
93
- instruments: Set of instruments to enable (None = all instruments)
94
- block_instruments: Set of instruments to explicitly disable
95
- """
96
92
  self._initialized: bool = False
97
93
  # Check if tracing is disabled via environment variable
98
94
  if _is_tracing_disabled():
@@ -157,14 +153,20 @@ class DocentTracer:
157
153
  lambda: itertools.count(0)
158
154
  )
159
155
  self._transcript_counter_lock = threading.Lock()
156
+ self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
157
+ self._transcript_group_state_lock = threading.Lock()
160
158
  self._flush_lock = threading.Lock()
161
159
 
162
160
  def get_current_agent_run_id(self) -> Optional[str]:
163
161
  """
164
162
  Get the current agent run ID from context.
165
163
 
164
+ Retrieves the agent run ID that was set in the current execution context.
165
+ If no agent run context is active, returns the default agent run ID.
166
+
166
167
  Returns:
167
- The current agent run ID if available, None otherwise
168
+ The current agent run ID if available, or the default agent run ID
169
+ if no context is active.
168
170
  """
169
171
  try:
170
172
  return self._agent_run_id_var.get()
@@ -249,12 +251,23 @@ class DocentTracer:
249
251
  return
250
252
 
251
253
  try:
254
+
255
+ # Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
256
+ default_attribute_limit = 1024 * 16
257
+ env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
258
+ env_limit = int(env_value) if env_value.isdigit() else 0
259
+ attribute_limit = max(env_limit, default_attribute_limit)
260
+
261
+ span_limits = SpanLimits(
262
+ max_attributes=attribute_limit,
263
+ )
264
+
252
265
  # Create our own isolated tracer provider
253
266
  self._tracer_provider = TracerProvider(
254
- resource=Resource.create({"service.name": self.collection_name})
267
+ resource=Resource.create({"service.name": self.collection_name}),
268
+ span_limits=span_limits,
255
269
  )
256
270
 
257
- # Add custom span processor for agent_run_id and transcript_id
258
271
  class ContextSpanProcessor(SpanProcessor):
259
272
  def __init__(self, manager: "DocentTracer"):
260
273
  self.manager: "DocentTracer" = manager
@@ -312,11 +325,7 @@ class DocentTracer:
312
325
  )
313
326
 
314
327
  def on_end(self, span: ReadableSpan) -> None:
315
- # Debug logging for span completion
316
- span_attrs = span.attributes or {}
317
- logger.debug(
318
- f"Completed span: name='{span.name}', collection_id={span_attrs.get('collection_id')}, agent_run_id={span_attrs.get('agent_run_id')}, transcript_id={span_attrs.get('transcript_id')}, duration_ns={span.end_time - span.start_time if span.end_time and span.start_time else 'unknown'}"
319
- )
328
+ pass
320
329
 
321
330
  def shutdown(self) -> None:
322
331
  pass
@@ -410,6 +419,23 @@ class DocentTracer:
410
419
  except Exception as e:
411
420
  logger.warning(f"Failed to instrument LangChain: {e}")
412
421
 
422
+ # Instrument Google Generative AI with our isolated tracer provider
423
+ if Instruments.GOOGLE_GENERATIVEAI in enabled_instruments:
424
+ try:
425
+ if is_package_installed("google-generativeai") or is_package_installed(
426
+ "google-genai"
427
+ ):
428
+ from opentelemetry.instrumentation.google_generativeai import (
429
+ GoogleGenerativeAiInstrumentor,
430
+ )
431
+
432
+ GoogleGenerativeAiInstrumentor().instrument(
433
+ tracer_provider=self._tracer_provider
434
+ )
435
+ logger.info("Instrumented Google Generative AI")
436
+ except Exception as e:
437
+ logger.warning(f"Failed to instrument Google Generative AI: {e}")
438
+
413
439
  # Register cleanup handlers
414
440
  self._register_cleanup()
415
441
 
@@ -422,7 +448,17 @@ class DocentTracer:
422
448
  raise
423
449
 
424
450
  def cleanup(self):
425
- """Clean up Docent tracing resources and signal trace completion to backend."""
451
+ """
452
+ Clean up Docent tracing resources.
453
+
454
+ Flushes all pending spans to exporters and shuts down the tracer provider.
455
+ This method is automatically called during application shutdown via atexit
456
+ handlers, but can also be called manually for explicit cleanup.
457
+
458
+ The cleanup process:
459
+ 1. Flushes all span processors to ensure data is exported
460
+ 2. Shuts down the tracer provider and releases resources
461
+ """
426
462
  if self._disabled:
427
463
  return
428
464
 
@@ -473,10 +509,28 @@ class DocentTracer:
473
509
  if disabled and self._initialized:
474
510
  self.cleanup()
475
511
 
476
- def verify_initialized(self) -> bool:
512
+ def is_initialized(self) -> bool:
477
513
  """Verify if the manager is properly initialized."""
478
514
  return self._initialized
479
515
 
516
+ def get_disabled_agent_run_id(self, agent_run_id: Optional[str]) -> str:
517
+ """Return sentinel value for agent run ID when tracing is disabled."""
518
+ if agent_run_id is None:
519
+ return DISABLED_AGENT_RUN_ID
520
+ return agent_run_id
521
+
522
+ def get_disabled_transcript_id(self, transcript_id: Optional[str]) -> str:
523
+ """Return sentinel value for transcript ID when tracing is disabled."""
524
+ if transcript_id is None:
525
+ return DISABLED_TRANSCRIPT_ID
526
+ return transcript_id
527
+
528
+ def get_disabled_transcript_group_id(self, transcript_group_id: Optional[str]) -> str:
529
+ """Return sentinel value for transcript group ID when tracing is disabled."""
530
+ if transcript_group_id is None:
531
+ return DISABLED_TRANSCRIPT_GROUP_ID
532
+ return transcript_group_id
533
+
480
534
  @contextmanager
481
535
  def agent_run_context(
482
536
  self,
@@ -498,11 +552,8 @@ class DocentTracer:
498
552
  Tuple of (agent_run_id, transcript_id)
499
553
  """
500
554
  if self._disabled:
501
- # Return dummy IDs when tracing is disabled
502
- if agent_run_id is None:
503
- agent_run_id = str(uuid.uuid4())
504
- if transcript_id is None:
505
- transcript_id = str(uuid.uuid4())
555
+ agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
556
+ transcript_id = self.get_disabled_transcript_id(transcript_id)
506
557
  yield agent_run_id, transcript_id
507
558
  return
508
559
 
@@ -525,7 +576,7 @@ class DocentTracer:
525
576
  try:
526
577
  self.send_agent_run_metadata(agent_run_id, metadata)
527
578
  except Exception as e:
528
- logger.warning(f"Failed sending agent run metadata: {e}")
579
+ logger.error(f"Failed sending agent run metadata: {e}")
529
580
 
530
581
  yield agent_run_id, transcript_id
531
582
  finally:
@@ -555,11 +606,8 @@ class DocentTracer:
555
606
  Tuple of (agent_run_id, transcript_id)
556
607
  """
557
608
  if self._disabled:
558
- # Return dummy IDs when tracing is disabled
559
- if agent_run_id is None:
560
- agent_run_id = str(uuid.uuid4())
561
- if transcript_id is None:
562
- transcript_id = str(uuid.uuid4())
609
+ agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
610
+ transcript_id = self.get_disabled_transcript_id(transcript_id)
563
611
  yield agent_run_id, transcript_id
564
612
  return
565
613
 
@@ -605,15 +653,184 @@ class DocentTracer:
605
653
 
606
654
  return headers
607
655
 
656
+ def _ensure_json_serializable_metadata(self, metadata: Dict[str, Any], context: str) -> None:
657
+ """
658
+ Validate that metadata can be serialized to JSON before sending it to the backend.
659
+ """
660
+ try:
661
+ json.dumps(metadata)
662
+ except (TypeError, ValueError) as exc:
663
+ raise TypeError(f"{context} metadata must be JSON serializable") from exc
664
+ offending_path = self._find_null_character_path(metadata)
665
+ if offending_path is not None:
666
+ raise ValueError(
667
+ f"{context} metadata cannot contain null characters (found at {offending_path}). "
668
+ "Remove or replace '\\u0000' before calling Docent tracing APIs."
669
+ )
670
+
608
671
  def _post_json(self, path: str, data: Dict[str, Any]) -> None:
672
+ self._post_json_sync(path, data)
673
+
674
+ def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
609
675
  if not self._api_endpoint_base:
610
676
  raise RuntimeError("API endpoint base is not configured")
611
677
  url = f"{self._api_endpoint_base}{path}"
612
678
  try:
613
679
  resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
614
680
  resp.raise_for_status()
615
- except requests.exceptions.RequestException as e:
616
- logger.error(f"Failed POST {url}: {e}")
681
+ except requests.exceptions.RequestException as exc:
682
+ message = self._format_request_exception(url, exc)
683
+ raise DocentTelemetryRequestError(message) from exc
684
+
685
+ def _format_request_exception(self, url: str, exc: requests.exceptions.RequestException) -> str:
686
+ response: Optional[Response] = getattr(exc, "response", None)
687
+ message_parts: List[str] = [f"Failed POST {url}"]
688
+ suggestion: Optional[str]
689
+
690
+ if response is not None:
691
+ status_phrase = f"HTTP {response.status_code}"
692
+ if response.reason:
693
+ status_phrase = f"{status_phrase} {response.reason}"
694
+ message_parts.append(f"({status_phrase})")
695
+
696
+ detail = self._extract_response_detail(response)
697
+ if detail:
698
+ message_parts.append(f"- Backend detail: {detail}")
699
+
700
+ request_id = response.headers.get("x-request-id")
701
+ if request_id:
702
+ message_parts.append(f"(request-id: {request_id})")
703
+
704
+ suggestion = self._suggest_fix_for_status(response.status_code)
705
+ else:
706
+ message_parts.append(f"- {exc}")
707
+ suggestion = self._suggest_fix_for_status(None)
708
+
709
+ if suggestion:
710
+ message_parts.append(suggestion)
711
+
712
+ return " ".join(part for part in message_parts if part)
713
+
714
+ def _extract_response_detail(self, response: Response) -> Optional[str]:
715
+ try:
716
+ body = response.json()
717
+ except ValueError:
718
+ text = response.text.strip()
719
+ if not text:
720
+ return None
721
+ normalized = " ".join(text.split())
722
+ return self._truncate_error_message(normalized)
723
+
724
+ if isinstance(body, dict):
725
+ typed_body = cast(Dict[str, Any], body)
726
+ structured_message = self._structured_detail_message(typed_body)
727
+ if structured_message:
728
+ return self._truncate_error_message(structured_message)
729
+ return self._truncate_error_message(self._normalize_error_value(typed_body))
730
+
731
+ return self._truncate_error_message(self._normalize_error_value(body))
732
+
733
+ def _structured_detail_message(self, data: Dict[str, Any]) -> Optional[str]:
734
+ for key in ("detail", "message", "error"):
735
+ if key in data:
736
+ structured_value = self._structured_detail_value(data[key])
737
+ if structured_value:
738
+ return structured_value
739
+ return self._structured_detail_value(data)
740
+
741
+ def _structured_detail_value(self, value: Any) -> Optional[str]:
742
+ if isinstance(value, Mapping):
743
+ mapping_value = cast(Mapping[str, Any], value)
744
+ message = mapping_value.get("message")
745
+ hint = mapping_value.get("hint")
746
+ error_code = mapping_value.get("error_code")
747
+ request_id = mapping_value.get("request_id")
748
+ fallback_detail = mapping_value.get("detail")
749
+
750
+ parts: List[str] = []
751
+ if isinstance(message, str) and message.strip():
752
+ parts.append(message.strip())
753
+ elif isinstance(fallback_detail, str) and fallback_detail.strip():
754
+ parts.append(fallback_detail.strip())
755
+
756
+ if isinstance(hint, str) and hint.strip():
757
+ parts.append(f"(hint: {hint.strip()})")
758
+ if isinstance(error_code, str) and error_code.strip():
759
+ parts.append(f"[code: {error_code.strip()}]")
760
+ if isinstance(request_id, str) and request_id.strip():
761
+ parts.append(f"(request-id: {request_id.strip()})")
762
+
763
+ return " ".join(parts) if parts else None
764
+
765
+ if isinstance(value, str) and value.strip():
766
+ return value.strip()
767
+
768
+ return None
769
+
770
+ def _normalize_error_value(self, value: Any) -> str:
771
+ if isinstance(value, str):
772
+ return " ".join(value.split())
773
+
774
+ try:
775
+ serialized = json.dumps(value)
776
+ except (TypeError, ValueError):
777
+ serialized = str(value)
778
+
779
+ return " ".join(serialized.split())
780
+
781
+ def _truncate_error_message(self, message: str) -> str:
782
+ message = message.strip()
783
+ if len(message) <= ERROR_DETAIL_MAX_CHARS:
784
+ return message
785
+ return f"{message[:ERROR_DETAIL_MAX_CHARS]}..."
786
+
787
+ def _suggest_fix_for_status(self, status_code: Optional[int]) -> Optional[str]:
788
+ if status_code in (401, 403):
789
+ return (
790
+ "Verify that the Authorization header or DOCENT_API_KEY grants write access to the "
791
+ "target collection."
792
+ )
793
+ if status_code == 404:
794
+ return (
795
+ "Ensure the tracing endpoint passed to initialize_tracing matches the Docent server's "
796
+ "/rest/telemetry route."
797
+ )
798
+ if status_code in (400, 422):
799
+ return (
800
+ "Confirm the payload includes collection_id, agent_run_id, metadata, and timestamp in "
801
+ "the expected format."
802
+ )
803
+ if status_code and status_code >= 500:
804
+ return "Inspect the Docent backend logs for the referenced request."
805
+ if status_code is None:
806
+ return "Confirm the Docent telemetry endpoint is reachable from this process."
807
+ return None
808
+
809
+ def _find_null_character_path(self, value: Any, path: str = "") -> Optional[str]:
810
+ """Backend rejects NUL bytes, so detect them before we send metadata to the backend."""
811
+ return None
812
+ if isinstance(value, str):
813
+ if "\x00" in value or "\\u0000" in value or "\\x00" in value:
814
+ return path or "<root>"
815
+ return None
816
+
817
+ if isinstance(value, dict):
818
+ for key, item in value.items():
819
+ next_path = f"{path}.{key}" if path else str(key)
820
+ result = self._find_null_character_path(item, next_path)
821
+ if result:
822
+ return result
823
+ return None
824
+
825
+ if isinstance(value, (list, tuple)):
826
+ for index, item in enumerate(value):
827
+ next_path = f"{path}[{index}]" if path else f"[{index}]"
828
+ result = self._find_null_character_path(item, next_path)
829
+ if result:
830
+ return result
831
+ return None
832
+
833
+ return None
617
834
 
618
835
  def send_agent_run_score(
619
836
  self,
@@ -650,6 +867,8 @@ class DocentTracer:
650
867
  if self._disabled:
651
868
  return
652
869
 
870
+ self._ensure_json_serializable_metadata(metadata, "Agent run")
871
+
653
872
  collection_id = self.collection_id
654
873
  payload: Dict[str, Any] = {
655
874
  "collection_id": collection_id,
@@ -695,6 +914,7 @@ class DocentTracer:
695
914
  if transcript_group_id is not None:
696
915
  payload["transcript_group_id"] = transcript_group_id
697
916
  if metadata is not None:
917
+ self._ensure_json_serializable_metadata(metadata, "Transcript")
698
918
  payload["metadata"] = metadata
699
919
 
700
920
  self._post_json("/v1/transcript-metadata", payload)
@@ -746,9 +966,7 @@ class DocentTracer:
746
966
  The transcript ID
747
967
  """
748
968
  if self._disabled:
749
- # Return dummy ID when tracing is disabled
750
- if transcript_id is None:
751
- transcript_id = str(uuid.uuid4())
969
+ transcript_id = self.get_disabled_transcript_id(transcript_id)
752
970
  yield transcript_id
753
971
  return
754
972
 
@@ -778,7 +996,7 @@ class DocentTracer:
778
996
  transcript_id, name, description, transcript_group_id, metadata
779
997
  )
780
998
  except Exception as e:
781
- logger.warning(f"Failed sending transcript data: {e}")
999
+ logger.error(f"Failed sending transcript data: {e}")
782
1000
 
783
1001
  yield transcript_id
784
1002
  finally:
@@ -808,9 +1026,7 @@ class DocentTracer:
808
1026
  The transcript ID
809
1027
  """
810
1028
  if self._disabled:
811
- # Return dummy ID when tracing is disabled
812
- if transcript_id is None:
813
- transcript_id = str(uuid.uuid4())
1029
+ transcript_id = self.get_disabled_transcript_id(transcript_id)
814
1030
  yield transcript_id
815
1031
  return
816
1032
 
@@ -840,7 +1056,7 @@ class DocentTracer:
840
1056
  transcript_id, name, description, transcript_group_id, metadata
841
1057
  )
842
1058
  except Exception as e:
843
- logger.warning(f"Failed sending transcript data: {e}")
1059
+ logger.error(f"Failed sending transcript data: {e}")
844
1060
 
845
1061
  yield transcript_id
846
1062
  finally:
@@ -878,6 +1094,27 @@ class DocentTracer:
878
1094
  )
879
1095
  return
880
1096
 
1097
+ with self._transcript_group_state_lock:
1098
+ state: dict[str, Optional[str]] = self._transcript_group_states.setdefault(
1099
+ transcript_group_id, {}
1100
+ )
1101
+ final_name: Optional[str] = name if name is not None else state.get("name")
1102
+ final_description: Optional[str] = (
1103
+ description if description is not None else state.get("description")
1104
+ )
1105
+ final_parent_transcript_group_id: Optional[str] = (
1106
+ parent_transcript_group_id
1107
+ if parent_transcript_group_id is not None
1108
+ else state.get("parent_transcript_group_id")
1109
+ )
1110
+
1111
+ if final_name is not None:
1112
+ state["name"] = final_name
1113
+ if final_description is not None:
1114
+ state["description"] = final_description
1115
+ if final_parent_transcript_group_id is not None:
1116
+ state["parent_transcript_group_id"] = final_parent_transcript_group_id
1117
+
881
1118
  payload: Dict[str, Any] = {
882
1119
  "collection_id": collection_id,
883
1120
  "transcript_group_id": transcript_group_id,
@@ -885,13 +1122,14 @@ class DocentTracer:
885
1122
  "timestamp": datetime.now(timezone.utc).isoformat(),
886
1123
  }
887
1124
 
888
- if name is not None:
889
- payload["name"] = name
890
- if description is not None:
891
- payload["description"] = description
892
- if parent_transcript_group_id is not None:
893
- payload["parent_transcript_group_id"] = parent_transcript_group_id
1125
+ if final_name is not None:
1126
+ payload["name"] = final_name
1127
+ if final_description is not None:
1128
+ payload["description"] = final_description
1129
+ if final_parent_transcript_group_id is not None:
1130
+ payload["parent_transcript_group_id"] = final_parent_transcript_group_id
894
1131
  if metadata is not None:
1132
+ self._ensure_json_serializable_metadata(metadata, "Transcript group")
895
1133
  payload["metadata"] = metadata
896
1134
 
897
1135
  self._post_json("/v1/transcript-group-metadata", payload)
@@ -919,9 +1157,7 @@ class DocentTracer:
919
1157
  The transcript group ID
920
1158
  """
921
1159
  if self._disabled:
922
- # Return dummy ID when tracing is disabled
923
- if transcript_group_id is None:
924
- transcript_group_id = str(uuid.uuid4())
1160
+ transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
925
1161
  yield transcript_group_id
926
1162
  return
927
1163
 
@@ -953,7 +1189,7 @@ class DocentTracer:
953
1189
  transcript_group_id, name, description, parent_transcript_group_id, metadata
954
1190
  )
955
1191
  except Exception as e:
956
- logger.warning(f"Failed sending transcript group data: {e}")
1192
+ logger.error(f"Failed sending transcript group data: {e}")
957
1193
 
958
1194
  yield transcript_group_id
959
1195
  finally:
@@ -983,9 +1219,7 @@ class DocentTracer:
983
1219
  The transcript group ID
984
1220
  """
985
1221
  if self._disabled:
986
- # Return dummy ID when tracing is disabled
987
- if transcript_group_id is None:
988
- transcript_group_id = str(uuid.uuid4())
1222
+ transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
989
1223
  yield transcript_group_id
990
1224
  return
991
1225
 
@@ -1017,7 +1251,7 @@ class DocentTracer:
1017
1251
  transcript_group_id, name, description, parent_transcript_group_id, metadata
1018
1252
  )
1019
1253
  except Exception as e:
1020
- logger.warning(f"Failed sending transcript group data: {e}")
1254
+ logger.error(f"Failed sending transcript group data: {e}")
1021
1255
 
1022
1256
  yield transcript_group_id
1023
1257
  finally:
@@ -1063,8 +1297,9 @@ def initialize_tracing(
1063
1297
  collection_id: Optional collection ID (auto-generated if not provided)
1064
1298
  endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
1065
1299
  headers: Optional headers for authentication
1066
- api_key: Optional API key for bearer token authentication (takes precedence over env var)
1067
- enable_console_export: Whether to export spans to console
1300
+ api_key: Optional API key for bearer token authentication (takes precedence
1301
+ over DOCENT_API_KEY environment variable)
1302
+ enable_console_export: Whether to export spans to console for debugging
1068
1303
  enable_otlp_export: Whether to export spans to OTLP endpoint
1069
1304
  disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
1070
1305
  instruments: Set of instruments to enable (None = all instruments).
@@ -1074,7 +1309,6 @@ def initialize_tracing(
1074
1309
  The initialized Docent tracer
1075
1310
 
1076
1311
  Example:
1077
- # Basic setup
1078
1312
  initialize_tracing("my-collection")
1079
1313
  """
1080
1314
 
@@ -1137,17 +1371,17 @@ def close_tracing() -> None:
1137
1371
  def flush_tracing() -> None:
1138
1372
  """Force flush all spans to exporters."""
1139
1373
  if _global_tracer:
1140
- logger.debug("Flushing global tracer")
1374
+ logger.debug("Flushing Docent tracer")
1141
1375
  _global_tracer.flush()
1142
1376
  else:
1143
1377
  logger.debug("No global tracer available to flush")
1144
1378
 
1145
1379
 
1146
- def verify_initialized() -> bool:
1380
+ def is_initialized() -> bool:
1147
1381
  """Verify if the global Docent tracer is properly initialized."""
1148
1382
  if _global_tracer is None:
1149
1383
  return False
1150
- return _global_tracer.verify_initialized()
1384
+ return _global_tracer.is_initialized()
1151
1385
 
1152
1386
 
1153
1387
  def is_disabled() -> bool:
@@ -1221,28 +1455,33 @@ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
1221
1455
 
1222
1456
  tracer.send_agent_run_metadata(agent_run_id, metadata)
1223
1457
  except Exception as e:
1224
- logger.error(f"Failed to send metadata: {e}")
1458
+ logger.error(f"Failed to send agent run metadata: {e}")
1225
1459
 
1226
1460
 
1227
1461
  def transcript_metadata(
1462
+ metadata: Dict[str, Any],
1463
+ *,
1228
1464
  name: Optional[str] = None,
1229
1465
  description: Optional[str] = None,
1230
1466
  transcript_group_id: Optional[str] = None,
1231
- metadata: Optional[Dict[str, Any]] = None,
1232
1467
  ) -> None:
1233
1468
  """
1234
1469
  Send transcript metadata directly to the backend for the current transcript.
1235
1470
 
1236
1471
  Args:
1472
+ metadata: Dictionary of metadata to attach to the current transcript (required)
1237
1473
  name: Optional transcript name
1238
1474
  description: Optional transcript description
1239
- parent_transcript_id: Optional parent transcript ID
1240
- metadata: Optional metadata to send
1475
+ transcript_group_id: Optional transcript group ID to associate with
1241
1476
 
1242
1477
  Example:
1243
- transcript_metadata(name="data_processing", description="Process user data")
1244
- transcript_metadata(metadata={"user": "John", "model": "gpt-4"})
1245
- transcript_metadata(name="validation", parent_transcript_id="parent-123")
1478
+ transcript_metadata({"user": "John", "model": "gpt-4"})
1479
+ transcript_metadata({"env": "prod"}, name="data_processing")
1480
+ transcript_metadata(
1481
+ {"team": "search"},
1482
+ name="validation",
1483
+ transcript_group_id="group-123",
1484
+ )
1246
1485
  """
1247
1486
  try:
1248
1487
  tracer = get_tracer()
@@ -1260,6 +1499,47 @@ def transcript_metadata(
1260
1499
  logger.error(f"Failed to send transcript metadata: {e}")
1261
1500
 
1262
1501
 
1502
+ def transcript_group_metadata(
1503
+ metadata: Dict[str, Any],
1504
+ *,
1505
+ name: Optional[str] = None,
1506
+ description: Optional[str] = None,
1507
+ parent_transcript_group_id: Optional[str] = None,
1508
+ ) -> None:
1509
+ """
1510
+ Send transcript group metadata directly to the backend for the current transcript group.
1511
+
1512
+ Args:
1513
+ metadata: Dictionary of metadata to attach to the current transcript group (required)
1514
+ name: Optional transcript group name
1515
+ description: Optional transcript group description
1516
+ parent_transcript_group_id: Optional parent transcript group ID
1517
+
1518
+ Example:
1519
+ transcript_group_metadata({"team": "search", "env": "prod"})
1520
+ transcript_group_metadata({"env": "prod"}, name="pipeline")
1521
+ transcript_group_metadata(
1522
+ {"team": "search"},
1523
+ name="pipeline",
1524
+ parent_transcript_group_id="root-group",
1525
+ )
1526
+ """
1527
+ try:
1528
+ tracer = get_tracer()
1529
+ if tracer.is_disabled():
1530
+ return
1531
+ transcript_group_id = tracer.get_current_transcript_group_id()
1532
+ if not transcript_group_id:
1533
+ logger.warning("No active transcript group context. Metadata will not be sent.")
1534
+ return
1535
+
1536
+ tracer.send_transcript_group_metadata(
1537
+ transcript_group_id, name, description, parent_transcript_group_id, metadata
1538
+ )
1539
+ except Exception as e:
1540
+ logger.error(f"Failed to send transcript group metadata: {e}")
1541
+
1542
+
1263
1543
  class AgentRunContext:
1264
1544
  """Context manager that works in both sync and async contexts."""
1265
1545
 
@@ -1279,6 +1559,11 @@ class AgentRunContext:
1279
1559
 
1280
1560
  def __enter__(self) -> tuple[str, str]:
1281
1561
  """Sync context manager entry."""
1562
+ if is_disabled():
1563
+ tracer = get_tracer()
1564
+ self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
1565
+ self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
1566
+ return self.agent_run_id, self.transcript_id
1282
1567
  self._sync_context = get_tracer().agent_run_context(
1283
1568
  self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
1284
1569
  )
@@ -1291,6 +1576,11 @@ class AgentRunContext:
1291
1576
 
1292
1577
  async def __aenter__(self) -> tuple[str, str]:
1293
1578
  """Async context manager entry."""
1579
+ if is_disabled():
1580
+ tracer = get_tracer()
1581
+ self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
1582
+ self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
1583
+ return self.agent_run_id, self.transcript_id
1294
1584
  self._async_context = get_tracer().async_agent_run_context(
1295
1585
  self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
1296
1586
  )
@@ -1431,6 +1721,10 @@ class TranscriptContext:
1431
1721
 
1432
1722
  def __enter__(self) -> str:
1433
1723
  """Sync context manager entry."""
1724
+ if is_disabled():
1725
+ tracer = get_tracer()
1726
+ self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
1727
+ return self.transcript_id
1434
1728
  self._sync_context = get_tracer().transcript_context(
1435
1729
  name=self.name,
1436
1730
  transcript_id=self.transcript_id,
@@ -1447,6 +1741,10 @@ class TranscriptContext:
1447
1741
 
1448
1742
  async def __aenter__(self) -> str:
1449
1743
  """Async context manager entry."""
1744
+ if is_disabled():
1745
+ tracer = get_tracer()
1746
+ self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
1747
+ return self.transcript_id
1450
1748
  self._async_context = get_tracer().async_transcript_context(
1451
1749
  name=self.name,
1452
1750
  transcript_id=self.transcript_id,
@@ -1608,6 +1906,12 @@ class TranscriptGroupContext:
1608
1906
 
1609
1907
  def __enter__(self) -> str:
1610
1908
  """Sync context manager entry."""
1909
+ if is_disabled():
1910
+ tracer = get_tracer()
1911
+ self.transcript_group_id = tracer.get_disabled_transcript_group_id(
1912
+ self.transcript_group_id
1913
+ )
1914
+ return self.transcript_group_id
1611
1915
  self._sync_context = get_tracer().transcript_group_context(
1612
1916
  name=self.name,
1613
1917
  transcript_group_id=self.transcript_group_id,
@@ -1624,6 +1928,12 @@ class TranscriptGroupContext:
1624
1928
 
1625
1929
  async def __aenter__(self) -> str:
1626
1930
  """Async context manager entry."""
1931
+ if is_disabled():
1932
+ tracer = get_tracer()
1933
+ self.transcript_group_id = tracer.get_disabled_transcript_group_id(
1934
+ self.transcript_group_id
1935
+ )
1936
+ return self.transcript_group_id
1627
1937
  self._async_context = get_tracer().async_transcript_group_context(
1628
1938
  name=self.name,
1629
1939
  transcript_group_id=self.transcript_group_id,
@@ -1764,3 +2074,16 @@ def transcript_group_context(
1764
2074
  return TranscriptGroupContext(
1765
2075
  name, transcript_group_id, description, metadata, parent_transcript_group_id
1766
2076
  )
2077
+
2078
+
2079
+ def _is_tracing_disabled() -> bool:
2080
+ """Check if tracing is disabled via environment variable."""
2081
+ return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
2082
+
2083
+
2084
+ def _is_notebook() -> bool:
2085
+ """Check if we're running in a Jupyter notebook."""
2086
+ try:
2087
+ return "ipykernel" in sys.modules
2088
+ except Exception:
2089
+ return False