PyPI - docent-python - Versions diffs - 0.1.14a0__py3-none-any.whl → 0.1.28a0__py3-none-any.whl - Mend

docent-python 0.1.14a0py3-none-any.whl → 0.1.28a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docent-python might be problematic. Click here for more details.

Files changed (46) hide show

docent/_llm_util/__init__.py +0 -0
docent/_llm_util/data_models/__init__.py +0 -0
docent/_llm_util/data_models/exceptions.py +48 -0
docent/_llm_util/data_models/llm_output.py +331 -0
docent/_llm_util/llm_cache.py +193 -0
docent/_llm_util/llm_svc.py +472 -0
docent/_llm_util/model_registry.py +130 -0
docent/_llm_util/providers/__init__.py +0 -0
docent/_llm_util/providers/anthropic.py +537 -0
docent/_llm_util/providers/common.py +41 -0
docent/_llm_util/providers/google.py +530 -0
docent/_llm_util/providers/openai.py +745 -0
docent/_llm_util/providers/openrouter.py +375 -0
docent/_llm_util/providers/preference_types.py +104 -0
docent/_llm_util/providers/provider_registry.py +164 -0
docent/data_models/__init__.py +2 -0
docent/data_models/agent_run.py +17 -29
docent/data_models/chat/__init__.py +6 -1
docent/data_models/chat/message.py +3 -1
docent/data_models/citation.py +103 -22
docent/data_models/judge.py +19 -0
docent/data_models/metadata_util.py +16 -0
docent/data_models/remove_invalid_citation_ranges.py +23 -10
docent/data_models/transcript.py +25 -80
docent/data_models/util.py +170 -0
docent/judges/__init__.py +23 -0
docent/judges/analysis.py +77 -0
docent/judges/impl.py +587 -0
docent/judges/runner.py +129 -0
docent/judges/stats.py +205 -0
docent/judges/types.py +311 -0
docent/judges/util/forgiving_json.py +108 -0
docent/judges/util/meta_schema.json +86 -0
docent/judges/util/meta_schema.py +29 -0
docent/judges/util/parse_output.py +87 -0
docent/judges/util/voting.py +139 -0
docent/sdk/agent_run_writer.py +72 -21
docent/sdk/client.py +276 -23
docent/trace.py +413 -90
{docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/METADATA +13 -5
docent_python-0.1.28a0.dist-info/RECORD +59 -0
docent/data_models/metadata.py +0 -229
docent/data_models/yaml_util.py +0 -12
docent_python-0.1.14a0.dist-info/RECORD +0 -32
{docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/WHEEL +0 -0
{docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/licenses/LICENSE.md +0 -0

docent/trace.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import atexit
 import contextvars
 import itertools
+import json
 import logging
 import os
 import sys
@@ -12,7 +13,19 @@ from contextvars import ContextVar, Token
 from datetime import datetime, timezone
 from enum import Enum
 from importlib.metadata import Distribution, distributions
-from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Optional, Set, Union
+from typing import (
+    Any,
+    AsyncIterator,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    Union,
+    cast,
+)
 import requests
 from opentelemetry import trace
@@ -21,26 +34,30 @@ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExport
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
 from opentelemetry.instrumentation.threading import ThreadingInstrumentor
 from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
+from opentelemetry.sdk.trace import ReadableSpan, SpanLimits, SpanProcessor, TracerProvider
 from opentelemetry.sdk.trace.export import (
     BatchSpanProcessor,
     ConsoleSpanExporter,
     SimpleSpanProcessor,
 )
 from opentelemetry.trace import Span
+from requests import Response
-# Configure logging
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.ERROR)
 # Default configuration
 DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
 DEFAULT_COLLECTION_NAME = "default-collection-name"
+ERROR_DETAIL_MAX_CHARS = 500
+# Sentinel values for when tracing is disabled
+DISABLED_AGENT_RUN_ID = "disabled"
+DISABLED_TRANSCRIPT_ID = "disabled"
+DISABLED_TRANSCRIPT_GROUP_ID = "disabled"
-def _is_tracing_disabled() -> bool:
-    """Check if tracing is disabled via environment variable."""
-    return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
+class DocentTelemetryRequestError(RuntimeError):
+    """Raised when the Docent telemetry backend rejects a client request."""
 class Instruments(Enum):
@@ -50,18 +67,13 @@ class Instruments(Enum):
     ANTHROPIC = "anthropic"
     BEDROCK = "bedrock"
     LANGCHAIN = "langchain"
-def _is_notebook() -> bool:
-    """Check if we're running in a Jupyter notebook."""
-    try:
-        return "ipykernel" in sys.modules
-    except Exception:
-        return False
+    GOOGLE_GENERATIVEAI = "google_generativeai"
 class DocentTracer:
-    """Manages Docent tracing setup and provides tracing utilities."""
+    """
+    Manages Docent tracing setup and provides tracing utilities.
+    """
     def __init__(
         self,
@@ -77,22 +89,6 @@ class DocentTracer:
         instruments: Optional[Set[Instruments]] = None,
         block_instruments: Optional[Set[Instruments]] = None,
     ):
-        """
-        Initialize Docent tracing manager.
-        Args:
-            collection_name: Name of the collection for resource attributes
-            collection_id: Optional collection ID (auto-generated if not provided)
-            agent_run_id: Optional agent_run_id to use for code outside of an agent run context (auto-generated if not provided)
-            endpoint: OTLP endpoint URL(s) - can be a single string or list of strings for multiple endpoints
-            headers: Optional headers for authentication
-            api_key: Optional API key for bearer token authentication (takes precedence over env var)
-            enable_console_export: Whether to export to console
-            enable_otlp_export: Whether to export to OTLP endpoint
-            disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
-            instruments: Set of instruments to enable (None = all instruments)
-            block_instruments: Set of instruments to explicitly disable
-        """
         self._initialized: bool = False
         # Check if tracing is disabled via environment variable
         if _is_tracing_disabled():
@@ -157,14 +153,20 @@ class DocentTracer:
             lambda: itertools.count(0)
         )
         self._transcript_counter_lock = threading.Lock()
+        self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
+        self._transcript_group_state_lock = threading.Lock()
         self._flush_lock = threading.Lock()
     def get_current_agent_run_id(self) -> Optional[str]:
         """
         Get the current agent run ID from context.
+        Retrieves the agent run ID that was set in the current execution context.
+        If no agent run context is active, returns the default agent run ID.
         Returns:
-            The current agent run ID if available, None otherwise
+            The current agent run ID if available, or the default agent run ID
+            if no context is active.
         """
         try:
             return self._agent_run_id_var.get()
@@ -249,12 +251,23 @@ class DocentTracer:
             return
         try:
+            # Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
+            default_attribute_limit = 1024 * 16
+            env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
+            env_limit = int(env_value) if env_value.isdigit() else 0
+            attribute_limit = max(env_limit, default_attribute_limit)
+            span_limits = SpanLimits(
+                max_attributes=attribute_limit,
+            )
             # Create our own isolated tracer provider
             self._tracer_provider = TracerProvider(
-                resource=Resource.create({"service.name": self.collection_name})
+                resource=Resource.create({"service.name": self.collection_name}),
+                span_limits=span_limits,
             )
-            # Add custom span processor for agent_run_id and transcript_id
             class ContextSpanProcessor(SpanProcessor):
                 def __init__(self, manager: "DocentTracer"):
                     self.manager: "DocentTracer" = manager
@@ -312,11 +325,7 @@ class DocentTracer:
                     )
                 def on_end(self, span: ReadableSpan) -> None:
-                    # Debug logging for span completion
-                    span_attrs = span.attributes or {}
-                    logger.debug(
-                        f"Completed span: name='{span.name}', collection_id={span_attrs.get('collection_id')}, agent_run_id={span_attrs.get('agent_run_id')}, transcript_id={span_attrs.get('transcript_id')}, duration_ns={span.end_time - span.start_time if span.end_time and span.start_time else 'unknown'}"
-                    )
+                    pass
                 def shutdown(self) -> None:
                     pass
@@ -410,6 +419,23 @@ class DocentTracer:
                 except Exception as e:
                     logger.warning(f"Failed to instrument LangChain: {e}")
+            # Instrument Google Generative AI with our isolated tracer provider
+            if Instruments.GOOGLE_GENERATIVEAI in enabled_instruments:
+                try:
+                    if is_package_installed("google-generativeai") or is_package_installed(
+                        "google-genai"
+                    ):
+                        from opentelemetry.instrumentation.google_generativeai import (
+                            GoogleGenerativeAiInstrumentor,
+                        )
+                        GoogleGenerativeAiInstrumentor().instrument(
+                            tracer_provider=self._tracer_provider
+                        )
+                        logger.info("Instrumented Google Generative AI")
+                except Exception as e:
+                    logger.warning(f"Failed to instrument Google Generative AI: {e}")
             # Register cleanup handlers
             self._register_cleanup()
@@ -422,7 +448,17 @@ class DocentTracer:
             raise
     def cleanup(self):
-        """Clean up Docent tracing resources and signal trace completion to backend."""
+        """
+        Clean up Docent tracing resources.
+        Flushes all pending spans to exporters and shuts down the tracer provider.
+        This method is automatically called during application shutdown via atexit
+        handlers, but can also be called manually for explicit cleanup.
+        The cleanup process:
+        1. Flushes all span processors to ensure data is exported
+        2. Shuts down the tracer provider and releases resources
+        """
         if self._disabled:
             return
@@ -473,10 +509,28 @@ class DocentTracer:
         if disabled and self._initialized:
             self.cleanup()
-    def verify_initialized(self) -> bool:
+    def is_initialized(self) -> bool:
         """Verify if the manager is properly initialized."""
         return self._initialized
+    def get_disabled_agent_run_id(self, agent_run_id: Optional[str]) -> str:
+        """Return sentinel value for agent run ID when tracing is disabled."""
+        if agent_run_id is None:
+            return DISABLED_AGENT_RUN_ID
+        return agent_run_id
+    def get_disabled_transcript_id(self, transcript_id: Optional[str]) -> str:
+        """Return sentinel value for transcript ID when tracing is disabled."""
+        if transcript_id is None:
+            return DISABLED_TRANSCRIPT_ID
+        return transcript_id
+    def get_disabled_transcript_group_id(self, transcript_group_id: Optional[str]) -> str:
+        """Return sentinel value for transcript group ID when tracing is disabled."""
+        if transcript_group_id is None:
+            return DISABLED_TRANSCRIPT_GROUP_ID
+        return transcript_group_id
     @contextmanager
     def agent_run_context(
         self,
@@ -498,11 +552,8 @@ class DocentTracer:
             Tuple of (agent_run_id, transcript_id)
         """
         if self._disabled:
-            # Return dummy IDs when tracing is disabled
-            if agent_run_id is None:
-                agent_run_id = str(uuid.uuid4())
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield agent_run_id, transcript_id
             return
@@ -525,7 +576,7 @@ class DocentTracer:
                 try:
                     self.send_agent_run_metadata(agent_run_id, metadata)
                 except Exception as e:
-                    logger.warning(f"Failed sending agent run metadata: {e}")
+                    logger.error(f"Failed sending agent run metadata: {e}")
             yield agent_run_id, transcript_id
         finally:
@@ -555,11 +606,8 @@ class DocentTracer:
             Tuple of (agent_run_id, transcript_id)
         """
         if self._disabled:
-            # Return dummy IDs when tracing is disabled
-            if agent_run_id is None:
-                agent_run_id = str(uuid.uuid4())
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield agent_run_id, transcript_id
             return
@@ -605,15 +653,184 @@ class DocentTracer:
         return headers
+    def _ensure_json_serializable_metadata(self, metadata: Dict[str, Any], context: str) -> None:
+        """
+        Validate that metadata can be serialized to JSON before sending it to the backend.
+        """
+        try:
+            json.dumps(metadata)
+        except (TypeError, ValueError) as exc:
+            raise TypeError(f"{context} metadata must be JSON serializable") from exc
+        offending_path = self._find_null_character_path(metadata)
+        if offending_path is not None:
+            raise ValueError(
+                f"{context} metadata cannot contain null characters (found at {offending_path}). "
+                "Remove or replace '\\u0000' before calling Docent tracing APIs."
+            )
     def _post_json(self, path: str, data: Dict[str, Any]) -> None:
+        self._post_json_sync(path, data)
+    def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
         if not self._api_endpoint_base:
             raise RuntimeError("API endpoint base is not configured")
         url = f"{self._api_endpoint_base}{path}"
         try:
             resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
             resp.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Failed POST {url}: {e}")
+        except requests.exceptions.RequestException as exc:
+            message = self._format_request_exception(url, exc)
+            raise DocentTelemetryRequestError(message) from exc
+    def _format_request_exception(self, url: str, exc: requests.exceptions.RequestException) -> str:
+        response: Optional[Response] = getattr(exc, "response", None)
+        message_parts: List[str] = [f"Failed POST {url}"]
+        suggestion: Optional[str]
+        if response is not None:
+            status_phrase = f"HTTP {response.status_code}"
+            if response.reason:
+                status_phrase = f"{status_phrase} {response.reason}"
+            message_parts.append(f"({status_phrase})")
+            detail = self._extract_response_detail(response)
+            if detail:
+                message_parts.append(f"- Backend detail: {detail}")
+            request_id = response.headers.get("x-request-id")
+            if request_id:
+                message_parts.append(f"(request-id: {request_id})")
+            suggestion = self._suggest_fix_for_status(response.status_code)
+        else:
+            message_parts.append(f"- {exc}")
+            suggestion = self._suggest_fix_for_status(None)
+        if suggestion:
+            message_parts.append(suggestion)
+        return " ".join(part for part in message_parts if part)
+    def _extract_response_detail(self, response: Response) -> Optional[str]:
+        try:
+            body = response.json()
+        except ValueError:
+            text = response.text.strip()
+            if not text:
+                return None
+            normalized = " ".join(text.split())
+            return self._truncate_error_message(normalized)
+        if isinstance(body, dict):
+            typed_body = cast(Dict[str, Any], body)
+            structured_message = self._structured_detail_message(typed_body)
+            if structured_message:
+                return self._truncate_error_message(structured_message)
+            return self._truncate_error_message(self._normalize_error_value(typed_body))
+        return self._truncate_error_message(self._normalize_error_value(body))
+    def _structured_detail_message(self, data: Dict[str, Any]) -> Optional[str]:
+        for key in ("detail", "message", "error"):
+            if key in data:
+                structured_value = self._structured_detail_value(data[key])
+                if structured_value:
+                    return structured_value
+        return self._structured_detail_value(data)
+    def _structured_detail_value(self, value: Any) -> Optional[str]:
+        if isinstance(value, Mapping):
+            mapping_value = cast(Mapping[str, Any], value)
+            message = mapping_value.get("message")
+            hint = mapping_value.get("hint")
+            error_code = mapping_value.get("error_code")
+            request_id = mapping_value.get("request_id")
+            fallback_detail = mapping_value.get("detail")
+            parts: List[str] = []
+            if isinstance(message, str) and message.strip():
+                parts.append(message.strip())
+            elif isinstance(fallback_detail, str) and fallback_detail.strip():
+                parts.append(fallback_detail.strip())
+            if isinstance(hint, str) and hint.strip():
+                parts.append(f"(hint: {hint.strip()})")
+            if isinstance(error_code, str) and error_code.strip():
+                parts.append(f"[code: {error_code.strip()}]")
+            if isinstance(request_id, str) and request_id.strip():
+                parts.append(f"(request-id: {request_id.strip()})")
+            return " ".join(parts) if parts else None
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+        return None
+    def _normalize_error_value(self, value: Any) -> str:
+        if isinstance(value, str):
+            return " ".join(value.split())
+        try:
+            serialized = json.dumps(value)
+        except (TypeError, ValueError):
+            serialized = str(value)
+        return " ".join(serialized.split())
+    def _truncate_error_message(self, message: str) -> str:
+        message = message.strip()
+        if len(message) <= ERROR_DETAIL_MAX_CHARS:
+            return message
+        return f"{message[:ERROR_DETAIL_MAX_CHARS]}..."
+    def _suggest_fix_for_status(self, status_code: Optional[int]) -> Optional[str]:
+        if status_code in (401, 403):
+            return (
+                "Verify that the Authorization header or DOCENT_API_KEY grants write access to the "
+                "target collection."
+            )
+        if status_code == 404:
+            return (
+                "Ensure the tracing endpoint passed to initialize_tracing matches the Docent server's "
+                "/rest/telemetry route."
+            )
+        if status_code in (400, 422):
+            return (
+                "Confirm the payload includes collection_id, agent_run_id, metadata, and timestamp in "
+                "the expected format."
+            )
+        if status_code and status_code >= 500:
+            return "Inspect the Docent backend logs for the referenced request."
+        if status_code is None:
+            return "Confirm the Docent telemetry endpoint is reachable from this process."
+        return None
+    def _find_null_character_path(self, value: Any, path: str = "") -> Optional[str]:
+        """Backend rejects NUL bytes, so detect them before we send metadata to the backend."""
+        return None
+        if isinstance(value, str):
+            if "\x00" in value or "\\u0000" in value or "\\x00" in value:
+                return path or "<root>"
+            return None
+        if isinstance(value, dict):
+            for key, item in value.items():
+                next_path = f"{path}.{key}" if path else str(key)
+                result = self._find_null_character_path(item, next_path)
+                if result:
+                    return result
+            return None
+        if isinstance(value, (list, tuple)):
+            for index, item in enumerate(value):
+                next_path = f"{path}[{index}]" if path else f"[{index}]"
+                result = self._find_null_character_path(item, next_path)
+                if result:
+                    return result
+            return None
+        return None
     def send_agent_run_score(
         self,
@@ -650,6 +867,8 @@ class DocentTracer:
         if self._disabled:
             return
+        self._ensure_json_serializable_metadata(metadata, "Agent run")
         collection_id = self.collection_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
@@ -695,6 +914,7 @@ class DocentTracer:
         if transcript_group_id is not None:
             payload["transcript_group_id"] = transcript_group_id
         if metadata is not None:
+            self._ensure_json_serializable_metadata(metadata, "Transcript")
             payload["metadata"] = metadata
         self._post_json("/v1/transcript-metadata", payload)
@@ -746,9 +966,7 @@ class DocentTracer:
             The transcript ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield transcript_id
             return
@@ -778,7 +996,7 @@ class DocentTracer:
                     transcript_id, name, description, transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript data: {e}")
+                logger.error(f"Failed sending transcript data: {e}")
             yield transcript_id
         finally:
@@ -808,9 +1026,7 @@ class DocentTracer:
             The transcript ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield transcript_id
             return
@@ -840,7 +1056,7 @@ class DocentTracer:
                     transcript_id, name, description, transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript data: {e}")
+                logger.error(f"Failed sending transcript data: {e}")
             yield transcript_id
         finally:
@@ -878,6 +1094,27 @@ class DocentTracer:
             )
             return
+        with self._transcript_group_state_lock:
+            state: dict[str, Optional[str]] = self._transcript_group_states.setdefault(
+                transcript_group_id, {}
+            )
+            final_name: Optional[str] = name if name is not None else state.get("name")
+            final_description: Optional[str] = (
+                description if description is not None else state.get("description")
+            )
+            final_parent_transcript_group_id: Optional[str] = (
+                parent_transcript_group_id
+                if parent_transcript_group_id is not None
+                else state.get("parent_transcript_group_id")
+            )
+            if final_name is not None:
+                state["name"] = final_name
+            if final_description is not None:
+                state["description"] = final_description
+            if final_parent_transcript_group_id is not None:
+                state["parent_transcript_group_id"] = final_parent_transcript_group_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
             "transcript_group_id": transcript_group_id,
@@ -885,13 +1122,14 @@ class DocentTracer:
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
-        if name is not None:
-            payload["name"] = name
-        if description is not None:
-            payload["description"] = description
-        if parent_transcript_group_id is not None:
-            payload["parent_transcript_group_id"] = parent_transcript_group_id
+        if final_name is not None:
+            payload["name"] = final_name
+        if final_description is not None:
+            payload["description"] = final_description
+        if final_parent_transcript_group_id is not None:
+            payload["parent_transcript_group_id"] = final_parent_transcript_group_id
         if metadata is not None:
+            self._ensure_json_serializable_metadata(metadata, "Transcript group")
             payload["metadata"] = metadata
         self._post_json("/v1/transcript-group-metadata", payload)
@@ -919,9 +1157,7 @@ class DocentTracer:
             The transcript group ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_group_id is None:
-                transcript_group_id = str(uuid.uuid4())
+            transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
             yield transcript_group_id
             return
@@ -953,7 +1189,7 @@ class DocentTracer:
                     transcript_group_id, name, description, parent_transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript group data: {e}")
+                logger.error(f"Failed sending transcript group data: {e}")
             yield transcript_group_id
         finally:
@@ -983,9 +1219,7 @@ class DocentTracer:
             The transcript group ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_group_id is None:
-                transcript_group_id = str(uuid.uuid4())
+            transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
             yield transcript_group_id
             return
@@ -1017,7 +1251,7 @@ class DocentTracer:
                     transcript_group_id, name, description, parent_transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript group data: {e}")
+                logger.error(f"Failed sending transcript group data: {e}")
             yield transcript_group_id
         finally:
@@ -1063,8 +1297,9 @@ def initialize_tracing(
         collection_id: Optional collection ID (auto-generated if not provided)
         endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
         headers: Optional headers for authentication
-        api_key: Optional API key for bearer token authentication (takes precedence over env var)
-        enable_console_export: Whether to export spans to console
+        api_key: Optional API key for bearer token authentication (takes precedence
+                over DOCENT_API_KEY environment variable)
+        enable_console_export: Whether to export spans to console for debugging
         enable_otlp_export: Whether to export spans to OTLP endpoint
         disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
         instruments: Set of instruments to enable (None = all instruments).
@@ -1074,7 +1309,6 @@ def initialize_tracing(
         The initialized Docent tracer
     Example:
-        # Basic setup
         initialize_tracing("my-collection")
     """
@@ -1137,17 +1371,17 @@ def close_tracing() -> None:
 def flush_tracing() -> None:
     """Force flush all spans to exporters."""
     if _global_tracer:
-        logger.debug("Flushing global tracer")
+        logger.debug("Flushing Docent tracer")
         _global_tracer.flush()
     else:
         logger.debug("No global tracer available to flush")
-def verify_initialized() -> bool:
+def is_initialized() -> bool:
     """Verify if the global Docent tracer is properly initialized."""
     if _global_tracer is None:
         return False
-    return _global_tracer.verify_initialized()
+    return _global_tracer.is_initialized()
 def is_disabled() -> bool:
@@ -1221,28 +1455,33 @@ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
         tracer.send_agent_run_metadata(agent_run_id, metadata)
     except Exception as e:
-        logger.error(f"Failed to send metadata: {e}")
+        logger.error(f"Failed to send agent run metadata: {e}")
 def transcript_metadata(
+    metadata: Dict[str, Any],
+    *,
     name: Optional[str] = None,
     description: Optional[str] = None,
     transcript_group_id: Optional[str] = None,
-    metadata: Optional[Dict[str, Any]] = None,
 ) -> None:
     """
     Send transcript metadata directly to the backend for the current transcript.
     Args:
+        metadata: Dictionary of metadata to attach to the current transcript (required)
         name: Optional transcript name
         description: Optional transcript description
-        parent_transcript_id: Optional parent transcript ID
-        metadata: Optional metadata to send
+        transcript_group_id: Optional transcript group ID to associate with
     Example:
-        transcript_metadata(name="data_processing", description="Process user data")
-        transcript_metadata(metadata={"user": "John", "model": "gpt-4"})
-        transcript_metadata(name="validation", parent_transcript_id="parent-123")
+        transcript_metadata({"user": "John", "model": "gpt-4"})
+        transcript_metadata({"env": "prod"}, name="data_processing")
+        transcript_metadata(
+            {"team": "search"},
+            name="validation",
+            transcript_group_id="group-123",
+        )
     """
     try:
         tracer = get_tracer()
@@ -1260,6 +1499,47 @@ def transcript_metadata(
         logger.error(f"Failed to send transcript metadata: {e}")
+def transcript_group_metadata(
+    metadata: Dict[str, Any],
+    *,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    parent_transcript_group_id: Optional[str] = None,
+) -> None:
+    """
+    Send transcript group metadata directly to the backend for the current transcript group.
+    Args:
+        metadata: Dictionary of metadata to attach to the current transcript group (required)
+        name: Optional transcript group name
+        description: Optional transcript group description
+        parent_transcript_group_id: Optional parent transcript group ID
+    Example:
+        transcript_group_metadata({"team": "search", "env": "prod"})
+        transcript_group_metadata({"env": "prod"}, name="pipeline")
+        transcript_group_metadata(
+            {"team": "search"},
+            name="pipeline",
+            parent_transcript_group_id="root-group",
+        )
+    """
+    try:
+        tracer = get_tracer()
+        if tracer.is_disabled():
+            return
+        transcript_group_id = tracer.get_current_transcript_group_id()
+        if not transcript_group_id:
+            logger.warning("No active transcript group context. Metadata will not be sent.")
+            return
+        tracer.send_transcript_group_metadata(
+            transcript_group_id, name, description, parent_transcript_group_id, metadata
+        )
+    except Exception as e:
+        logger.error(f"Failed to send transcript group metadata: {e}")
 class AgentRunContext:
     """Context manager that works in both sync and async contexts."""
@@ -1279,6 +1559,11 @@ class AgentRunContext:
     def __enter__(self) -> tuple[str, str]:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.agent_run_id, self.transcript_id
         self._sync_context = get_tracer().agent_run_context(
             self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
         )
@@ -1291,6 +1576,11 @@ class AgentRunContext:
     async def __aenter__(self) -> tuple[str, str]:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.agent_run_id, self.transcript_id
         self._async_context = get_tracer().async_agent_run_context(
             self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
         )
@@ -1431,6 +1721,10 @@ class TranscriptContext:
     def __enter__(self) -> str:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.transcript_id
         self._sync_context = get_tracer().transcript_context(
             name=self.name,
             transcript_id=self.transcript_id,
@@ -1447,6 +1741,10 @@ class TranscriptContext:
     async def __aenter__(self) -> str:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.transcript_id
         self._async_context = get_tracer().async_transcript_context(
             name=self.name,
             transcript_id=self.transcript_id,
@@ -1608,6 +1906,12 @@ class TranscriptGroupContext:
     def __enter__(self) -> str:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_group_id = tracer.get_disabled_transcript_group_id(
+                self.transcript_group_id
+            )
+            return self.transcript_group_id
         self._sync_context = get_tracer().transcript_group_context(
             name=self.name,
             transcript_group_id=self.transcript_group_id,
@@ -1624,6 +1928,12 @@ class TranscriptGroupContext:
     async def __aenter__(self) -> str:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_group_id = tracer.get_disabled_transcript_group_id(
+                self.transcript_group_id
+            )
+            return self.transcript_group_id
         self._async_context = get_tracer().async_transcript_group_context(
             name=self.name,
             transcript_group_id=self.transcript_group_id,
@@ -1764,3 +2074,16 @@ def transcript_group_context(
     return TranscriptGroupContext(
         name, transcript_group_id, description, metadata, parent_transcript_group_id
     )
+def _is_tracing_disabled() -> bool:
+    """Check if tracing is disabled via environment variable."""
+    return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
+def _is_notebook() -> bool:
+    """Check if we're running in a Jupyter notebook."""
+    try:
+        return "ipykernel" in sys.modules
+    except Exception:
+        return False

docent-python 0.1.14a0__py3-none-any.whl → 0.1.28a0__py3-none-any.whl

Potentially problematic release.

docent-python 0.1.14a0py3-none-any.whl → 0.1.28a0py3-none-any.whl