PyPI - docent-python - Versions diffs - 0.1.24a0__py3-none-any.whl → 0.1.28a0__py3-none-any.whl - Mend

docent-python 0.1.24a0py3-none-any.whl → 0.1.28a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docent-python might be problematic. Click here for more details.

Files changed (12) hide show

docent/_llm_util/data_models/llm_output.py +8 -0
docent/_llm_util/llm_svc.py +6 -6
docent/_llm_util/model_registry.py +4 -0
docent/_llm_util/providers/anthropic.py +1 -1
docent/data_models/agent_run.py +1 -0
docent/judges/runner.py +75 -12
docent/sdk/client.py +118 -1
docent/trace.py +312 -47
{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/METADATA +1 -1
{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/RECORD +12 -12
{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/WHEEL +0 -0
{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/licenses/LICENSE.md +0 -0

docent/_llm_util/data_models/llm_output.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pydantic import BaseModel
 from docent._llm_util.data_models.exceptions import (
     LLM_ERROR_TYPES,
     CompletionTooLongException,
+    ContextWindowException,
     LLMException,
 )
 from docent._log_util import get_logger
@@ -148,6 +149,13 @@ class LLMOutput:
     def from_dict(cls, data: dict[str, Any]) -> "LLMOutput":
         error_type_map = {e.error_type_id: e for e in LLM_ERROR_TYPES}
         errors = data.get("errors", [])
+        error_types_to_not_log: list[str] = [
+            CompletionTooLongException.error_type_id,
+            ContextWindowException.error_type_id,
+        ]
+        errors_to_log = [e for e in errors if e not in error_types_to_not_log]
+        if errors_to_log:
+            logger.error(f"Loading LLM output with errors: {errors}")
         errors = [error_type_map.get(e, LLMException)() for e in errors]
         completions = data.get("completions", [])

docent/_llm_util/llm_svc.py CHANGED Viewed

@@ -75,7 +75,7 @@ async def _parallelize_calls(
     completion_callback: AsyncLLMOutputStreamingCallback | None,
     # Arguments for the individual completion getter
     client: Any,
-    inputs: list[MessagesInput],
+    inputs: Sequence[MessagesInput],
     model_name: str,
     tools: list[ToolInfo] | None,
     tool_choice: Literal["auto", "required"] | None,
@@ -176,7 +176,7 @@ async def _parallelize_calls(
                         )
                         if retry_count >= MAX_VALIDATION_ATTEMPTS:
                             logger.error(
-                                f"Validation failed for {model_name} after {MAX_VALIDATION_ATTEMPTS} attempts: {e}"
+                                f"Validation failed for {model_name} after {retry_count} attempts. Original output: {e.failed_output}"
                             )
                             result = LLMOutput(
                                 model=model_name,
@@ -195,8 +195,8 @@ async def _parallelize_calls(
                         break
                     except Exception as e:
                         if not isinstance(e, LLMException):
-                            logger.warning(
-                                f"LLM call raised an exception that is not an LLMException: {e}"
+                            logger.error(
+                                f"LLM call raised an exception that is not an LLMException: {e}. Failure traceback:\n{traceback.format_exc()}"
                             )
                             llm_exception = LLMException(e)
                             llm_exception.__cause__ = e
@@ -306,7 +306,7 @@ async def _parallelize_calls(
 class BaseLLMService:
     def __init__(self, max_concurrency: int = DEFAULT_SVC_MAX_CONCURRENCY):
-        self._semaphore = Semaphore(max_concurrency)
+        self.max_concurrency, self._semaphore = max_concurrency, Semaphore(max_concurrency)
         self._client_cache: dict[tuple[str, str | None], Any] = {}  # (provider, api_key) -> client
         self._client_cache_lock = Lock()
@@ -326,7 +326,7 @@ class BaseLLMService:
     async def get_completions(
         self,
         *,
-        inputs: list[MessagesInput],
+        inputs: Sequence[MessagesInput],
         model_options: list[ModelOption],
         tools: list[ToolInfo] | None = None,
         tool_choice: Literal["auto", "required"] | None = None,

docent/_llm_util/model_registry.py CHANGED Viewed

@@ -54,6 +54,10 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
         "claude-sonnet-4",
         ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
     ),
+    (
+        "claude-haiku-4-5",
+        ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
+    ),
     (
         "gemini-2.5-flash-lite",
         ModelInfo(

docent/_llm_util/providers/anthropic.py CHANGED Viewed

@@ -178,7 +178,7 @@ def _parse_tool_choice(tool_choice: Literal["auto", "required"] | None) -> ToolC
 def _convert_anthropic_error(e: Exception):
     if isinstance(e, BadRequestError):
-        if "context limit" in e.message.lower():
+        if "context limit" in e.message.lower() or "prompt is too long" in e.message.lower():
             return ContextWindowException()
     if isinstance(e, RateLimitError):
         return RateLimitException(e)

docent/data_models/agent_run.py CHANGED Viewed

@@ -125,6 +125,7 @@ class AgentRun(BaseModel):
         #     )
         # Append the text field
+        result.append({"name": "agent_run_id", "type": "str"})
         result.append({"name": "text", "type": "str"})
         return result

docent/judges/runner.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from typing import Protocol, Sequence, runtime_checkable
 import anyio
 from tqdm.auto import tqdm
@@ -14,12 +16,28 @@ from docent.judges.impl import build_judge
 logger = get_logger(__name__)
+@runtime_checkable
+class AgentRunResolver(Protocol):
+    async def __call__(self) -> AgentRun | None: ...
+AgentRunInput = AgentRun | AgentRunResolver
+async def _resolve_agent_run(agent_run_input: AgentRunInput) -> AgentRun | None:
+    if isinstance(agent_run_input, AgentRun):
+        return agent_run_input
+    else:
+        return await agent_run_input()
 async def run_rubric(
-    agent_runs: list[AgentRun],
+    agent_runs: Sequence[AgentRunInput],
     rubric: Rubric,
     llm_svc: BaseLLMService,
     callback: JudgeResultCompletionCallback | None = None,
     *,
+    n_rollouts_per_input: int | list[int] = 1,
     show_progress: bool = True,
 ) -> list[JudgeResult | None]:
     if not agent_runs:
@@ -27,26 +45,70 @@ async def run_rubric(
     if rubric.n_rollouts_per_input <= 0:
         raise ValueError("rubric.n_rollouts_per_input must be greater than 0")
+    # Normalize n_rollouts_per_input to a list
+    if isinstance(n_rollouts_per_input, int):
+        if n_rollouts_per_input < 0:
+            raise ValueError("n_rollouts_per_input must be non-negative")
+        rollouts_per_run = [n_rollouts_per_input] * len(agent_runs)
+    else:
+        rollouts_per_run = n_rollouts_per_input
+        if len(rollouts_per_run) != len(agent_runs):
+            raise ValueError("n_rollouts_per_input list must match agent_runs length")
+        if any(n < 0 for n in rollouts_per_run):
+            raise ValueError("All values in n_rollouts_per_input must be non-negative")
     judge = build_judge(rubric, llm_svc)
+    total_rollouts = sum(rollouts_per_run)
     logger.info(
-        "Running rubric %s version %s against %d agent runs",
+        "Running rubric %s version %s against %d agent runs with %d total rollouts",
         rubric.id,
         rubric.version,
         len(agent_runs),
+        total_rollouts,
     )
-    agent_results: list[JudgeResult | None] = [None for _ in agent_runs]
+    agent_results: list[list[JudgeResult | None]] = [[] for _ in agent_runs]
     progress_bar = tqdm(
-        total=len(agent_runs), desc=f"Rubric {rubric.id}", disable=not show_progress
+        total=total_rollouts,
+        desc=f"Rubric {rubric.id}",
+        disable=not show_progress,
     )
-    async def _run_single_judge(index: int, agent_run: AgentRun):
-        agent_results[index] = result = await judge(agent_run)
+    # NOTE(mengk): using a (2 * llm max concurrency) semaphore is a hack to avoid
+    #   hammering _resolve_agent_run, which makes expensive DB calls, when they aren't going to be
+    #   immediately processed by the LLMService anyways.
+    # TODO(mengk): We should eventually implement a more idiomatic solution to this.
+    #   It's related to the idea of a global concurrency limiter.
+    run_judge_semaphore = anyio.Semaphore(llm_svc.max_concurrency * 2)
+    async def _run_single_judge(index: int, agent_run_input: AgentRunInput):
+        async with run_judge_semaphore:
+            rollout_results: list[JudgeResult | None] = []
+            if rollouts_per_run[index] == 0:
+                agent_results[index] = []
+                if callback is not None:
+                    await callback(index, None)
+                return
+            agent_run = await _resolve_agent_run(agent_run_input)
+            if agent_run is None:
+                if callback is not None:
+                    await callback(index, None)
+                return
+            for _ in range(rollouts_per_run[index]):
+                result = await judge(agent_run)
+                rollout_results.append(result)
+                progress_bar.update()
+            agent_results[index] = rollout_results
-        if callback is not None:
-            await callback(index, [result] if result is not None else None)
-        progress_bar.update()
+            if callback is not None:
+                # Filter out None results for the callback
+                valid_results = [r for r in rollout_results if r is not None]
+                await callback(index, valid_results if valid_results else None)
     try:
         async with anyio.create_task_group() as tg:
@@ -55,12 +117,13 @@ async def run_rubric(
     finally:
         progress_bar.close()
-    successful = sum(result is not None for result in agent_results)
+    flattened_results = [result for rollouts in agent_results for result in rollouts]
+    successful = sum(result is not None for result in flattened_results)
     logger.info(
         "Finished rubric %s: produced %d/%d judge results",
         rubric.id,
         successful,
-        len(agent_results),
+        len(flattened_results),
     )
-    return agent_results
+    return flattened_results

docent/sdk/client.py CHANGED Viewed

@@ -200,7 +200,7 @@ class Docent:
             version: The version of the rubric to get run state for. If None, the latest version is used.
         Returns:
-            dict: Dictionary containing rubric run state with results, job_id, and total_agent_runs.
+            dict: Dictionary containing rubric run state with results, job_id, and total_results_needed.
         Raises:
             requests.exceptions.HTTPError: If the API request fails.
@@ -450,6 +450,123 @@ class Docent:
         logger.info(f"Successfully shared Collection '{collection_id}' with {email}")
         return response.json()
+    def collection_exists(self, collection_id: str) -> bool:
+        """Check if a collection exists without raising if it does not."""
+        url = f"{self._server_url}/{collection_id}/exists"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return bool(response.json())
+    def has_collection_permission(self, collection_id: str, permission: str = "write") -> bool:
+        """Check whether the authenticated user has a specific permission on a collection.
+        Args:
+            collection_id: Collection to check.
+            permission: Permission level to verify (`read`, `write`, or `admin`).
+        Returns:
+            bool: True if the current API key has the requested permission; otherwise False.
+        Raises:
+            ValueError: If an unsupported permission value is provided.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        valid_permissions = {"read", "write", "admin"}
+        if permission not in valid_permissions:
+            raise ValueError(f"permission must be one of {sorted(valid_permissions)}")
+        url = f"{self._server_url}/{collection_id}/has_permission"
+        response = self._session.get(url, params={"permission": permission})
+        self._handle_response_errors(response)
+        payload = response.json()
+        return bool(payload.get("has_permission", False))
+    def get_dql_schema(self, collection_id: str) -> dict[str, Any]:
+        """Retrieve the DQL schema for a collection.
+        Args:
+            collection_id: ID of the Collection.
+        Returns:
+            dict: Dictionary containing available tables, columns, and metadata for DQL queries.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/dql/{collection_id}/schema"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return response.json()
+    def execute_dql(self, collection_id: str, dql: str) -> dict[str, Any]:
+        """Execute a DQL query against a collection.
+        Args:
+            collection_id: ID of the Collection.
+            dql: The DQL query string to execute.
+        Returns:
+            dict: Query execution results including rows, columns, execution metadata, and selected columns.
+        Raises:
+            ValueError: If `dql` is empty.
+            requests.exceptions.HTTPError: If the API request fails or the query is invalid.
+        """
+        if not dql.strip():
+            raise ValueError("dql must be a non-empty string")
+        url = f"{self._server_url}/dql/{collection_id}/execute"
+        response = self._session.post(url, json={"dql": dql})
+        self._handle_response_errors(response)
+        return response.json()
+    def select_agent_run_ids(
+        self,
+        collection_id: str,
+        where_clause: str | None = None,
+        limit: int | None = None,
+    ) -> list[str]:
+        """Convenience helper to fetch agent run IDs via DQL.
+        Args:
+            collection_id: ID of the Collection to query.
+            where_clause: Optional DQL WHERE clause applied to the agent_runs table.
+            limit: Optional LIMIT applied to the underlying DQL query.
+        Returns:
+            list[str]: Agent run IDs matching the criteria.
+        Raises:
+            ValueError: If the inputs are invalid.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        query = "SELECT agent_runs.id AS agent_run_id FROM agent_runs"
+        if where_clause:
+            where_clause = where_clause.strip()
+            if not where_clause:
+                raise ValueError("where_clause must be a non-empty string when provided")
+            query += f" WHERE {where_clause}"
+        if limit is not None:
+            if limit <= 0:
+                raise ValueError("limit must be a positive integer when provided")
+            query += f" LIMIT {limit}"
+        result = self.execute_dql(collection_id, query)
+        rows = result.get("rows", [])
+        agent_run_ids = [str(row[0]) for row in rows if row]
+        if result.get("truncated"):
+            logger.warning(
+                "DQL query truncated at applied limit %s; returning %s agent run IDs",
+                result.get("applied_limit"),
+                len(agent_run_ids),
+            )
+        return agent_run_ids
     def list_agent_run_ids(self, collection_id: str) -> list[str]:
         """Get all agent run IDs for a collection.

docent/trace.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import atexit
 import contextvars
 import itertools
+import json
 import logging
 import os
 import sys
@@ -12,7 +13,19 @@ from contextvars import ContextVar, Token
 from datetime import datetime, timezone
 from enum import Enum
 from importlib.metadata import Distribution, distributions
-from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Optional, Set, Union
+from typing import (
+    Any,
+    AsyncIterator,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    Union,
+    cast,
+)
 import requests
 from opentelemetry import trace
@@ -28,12 +41,23 @@ from opentelemetry.sdk.trace.export import (
     SimpleSpanProcessor,
 )
 from opentelemetry.trace import Span
+from requests import Response
 logger = logging.getLogger(__name__)
 # Default configuration
 DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
 DEFAULT_COLLECTION_NAME = "default-collection-name"
+ERROR_DETAIL_MAX_CHARS = 500
+# Sentinel values for when tracing is disabled
+DISABLED_AGENT_RUN_ID = "disabled"
+DISABLED_TRANSCRIPT_ID = "disabled"
+DISABLED_TRANSCRIPT_GROUP_ID = "disabled"
+class DocentTelemetryRequestError(RuntimeError):
+    """Raised when the Docent telemetry backend rejects a client request."""
 class Instruments(Enum):
@@ -129,6 +153,8 @@ class DocentTracer:
             lambda: itertools.count(0)
         )
         self._transcript_counter_lock = threading.Lock()
+        self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
+        self._transcript_group_state_lock = threading.Lock()
         self._flush_lock = threading.Lock()
     def get_current_agent_run_id(self) -> Optional[str]:
@@ -487,6 +513,24 @@ class DocentTracer:
         """Verify if the manager is properly initialized."""
         return self._initialized
+    def get_disabled_agent_run_id(self, agent_run_id: Optional[str]) -> str:
+        """Return sentinel value for agent run ID when tracing is disabled."""
+        if agent_run_id is None:
+            return DISABLED_AGENT_RUN_ID
+        return agent_run_id
+    def get_disabled_transcript_id(self, transcript_id: Optional[str]) -> str:
+        """Return sentinel value for transcript ID when tracing is disabled."""
+        if transcript_id is None:
+            return DISABLED_TRANSCRIPT_ID
+        return transcript_id
+    def get_disabled_transcript_group_id(self, transcript_group_id: Optional[str]) -> str:
+        """Return sentinel value for transcript group ID when tracing is disabled."""
+        if transcript_group_id is None:
+            return DISABLED_TRANSCRIPT_GROUP_ID
+        return transcript_group_id
     @contextmanager
     def agent_run_context(
         self,
@@ -508,11 +552,8 @@ class DocentTracer:
             Tuple of (agent_run_id, transcript_id)
         """
         if self._disabled:
-            # Return dummy IDs when tracing is disabled
-            if agent_run_id is None:
-                agent_run_id = str(uuid.uuid4())
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield agent_run_id, transcript_id
             return
@@ -535,7 +576,7 @@ class DocentTracer:
                 try:
                     self.send_agent_run_metadata(agent_run_id, metadata)
                 except Exception as e:
-                    logger.warning(f"Failed sending agent run metadata: {e}")
+                    logger.error(f"Failed sending agent run metadata: {e}")
             yield agent_run_id, transcript_id
         finally:
@@ -565,11 +606,8 @@ class DocentTracer:
             Tuple of (agent_run_id, transcript_id)
         """
         if self._disabled:
-            # Return dummy IDs when tracing is disabled
-            if agent_run_id is None:
-                agent_run_id = str(uuid.uuid4())
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield agent_run_id, transcript_id
             return
@@ -615,15 +653,184 @@ class DocentTracer:
         return headers
+    def _ensure_json_serializable_metadata(self, metadata: Dict[str, Any], context: str) -> None:
+        """
+        Validate that metadata can be serialized to JSON before sending it to the backend.
+        """
+        try:
+            json.dumps(metadata)
+        except (TypeError, ValueError) as exc:
+            raise TypeError(f"{context} metadata must be JSON serializable") from exc
+        offending_path = self._find_null_character_path(metadata)
+        if offending_path is not None:
+            raise ValueError(
+                f"{context} metadata cannot contain null characters (found at {offending_path}). "
+                "Remove or replace '\\u0000' before calling Docent tracing APIs."
+            )
     def _post_json(self, path: str, data: Dict[str, Any]) -> None:
+        self._post_json_sync(path, data)
+    def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
         if not self._api_endpoint_base:
             raise RuntimeError("API endpoint base is not configured")
         url = f"{self._api_endpoint_base}{path}"
         try:
             resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
             resp.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Failed POST {url}: {e}")
+        except requests.exceptions.RequestException as exc:
+            message = self._format_request_exception(url, exc)
+            raise DocentTelemetryRequestError(message) from exc
+    def _format_request_exception(self, url: str, exc: requests.exceptions.RequestException) -> str:
+        response: Optional[Response] = getattr(exc, "response", None)
+        message_parts: List[str] = [f"Failed POST {url}"]
+        suggestion: Optional[str]
+        if response is not None:
+            status_phrase = f"HTTP {response.status_code}"
+            if response.reason:
+                status_phrase = f"{status_phrase} {response.reason}"
+            message_parts.append(f"({status_phrase})")
+            detail = self._extract_response_detail(response)
+            if detail:
+                message_parts.append(f"- Backend detail: {detail}")
+            request_id = response.headers.get("x-request-id")
+            if request_id:
+                message_parts.append(f"(request-id: {request_id})")
+            suggestion = self._suggest_fix_for_status(response.status_code)
+        else:
+            message_parts.append(f"- {exc}")
+            suggestion = self._suggest_fix_for_status(None)
+        if suggestion:
+            message_parts.append(suggestion)
+        return " ".join(part for part in message_parts if part)
+    def _extract_response_detail(self, response: Response) -> Optional[str]:
+        try:
+            body = response.json()
+        except ValueError:
+            text = response.text.strip()
+            if not text:
+                return None
+            normalized = " ".join(text.split())
+            return self._truncate_error_message(normalized)
+        if isinstance(body, dict):
+            typed_body = cast(Dict[str, Any], body)
+            structured_message = self._structured_detail_message(typed_body)
+            if structured_message:
+                return self._truncate_error_message(structured_message)
+            return self._truncate_error_message(self._normalize_error_value(typed_body))
+        return self._truncate_error_message(self._normalize_error_value(body))
+    def _structured_detail_message(self, data: Dict[str, Any]) -> Optional[str]:
+        for key in ("detail", "message", "error"):
+            if key in data:
+                structured_value = self._structured_detail_value(data[key])
+                if structured_value:
+                    return structured_value
+        return self._structured_detail_value(data)
+    def _structured_detail_value(self, value: Any) -> Optional[str]:
+        if isinstance(value, Mapping):
+            mapping_value = cast(Mapping[str, Any], value)
+            message = mapping_value.get("message")
+            hint = mapping_value.get("hint")
+            error_code = mapping_value.get("error_code")
+            request_id = mapping_value.get("request_id")
+            fallback_detail = mapping_value.get("detail")
+            parts: List[str] = []
+            if isinstance(message, str) and message.strip():
+                parts.append(message.strip())
+            elif isinstance(fallback_detail, str) and fallback_detail.strip():
+                parts.append(fallback_detail.strip())
+            if isinstance(hint, str) and hint.strip():
+                parts.append(f"(hint: {hint.strip()})")
+            if isinstance(error_code, str) and error_code.strip():
+                parts.append(f"[code: {error_code.strip()}]")
+            if isinstance(request_id, str) and request_id.strip():
+                parts.append(f"(request-id: {request_id.strip()})")
+            return " ".join(parts) if parts else None
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+        return None
+    def _normalize_error_value(self, value: Any) -> str:
+        if isinstance(value, str):
+            return " ".join(value.split())
+        try:
+            serialized = json.dumps(value)
+        except (TypeError, ValueError):
+            serialized = str(value)
+        return " ".join(serialized.split())
+    def _truncate_error_message(self, message: str) -> str:
+        message = message.strip()
+        if len(message) <= ERROR_DETAIL_MAX_CHARS:
+            return message
+        return f"{message[:ERROR_DETAIL_MAX_CHARS]}..."
+    def _suggest_fix_for_status(self, status_code: Optional[int]) -> Optional[str]:
+        if status_code in (401, 403):
+            return (
+                "Verify that the Authorization header or DOCENT_API_KEY grants write access to the "
+                "target collection."
+            )
+        if status_code == 404:
+            return (
+                "Ensure the tracing endpoint passed to initialize_tracing matches the Docent server's "
+                "/rest/telemetry route."
+            )
+        if status_code in (400, 422):
+            return (
+                "Confirm the payload includes collection_id, agent_run_id, metadata, and timestamp in "
+                "the expected format."
+            )
+        if status_code and status_code >= 500:
+            return "Inspect the Docent backend logs for the referenced request."
+        if status_code is None:
+            return "Confirm the Docent telemetry endpoint is reachable from this process."
+        return None
+    def _find_null_character_path(self, value: Any, path: str = "") -> Optional[str]:
+        """Backend rejects NUL bytes, so detect them before we send metadata to the backend."""
+        return None
+        if isinstance(value, str):
+            if "\x00" in value or "\\u0000" in value or "\\x00" in value:
+                return path or "<root>"
+            return None
+        if isinstance(value, dict):
+            for key, item in value.items():
+                next_path = f"{path}.{key}" if path else str(key)
+                result = self._find_null_character_path(item, next_path)
+                if result:
+                    return result
+            return None
+        if isinstance(value, (list, tuple)):
+            for index, item in enumerate(value):
+                next_path = f"{path}[{index}]" if path else f"[{index}]"
+                result = self._find_null_character_path(item, next_path)
+                if result:
+                    return result
+            return None
+        return None
     def send_agent_run_score(
         self,
@@ -660,6 +867,8 @@ class DocentTracer:
         if self._disabled:
             return
+        self._ensure_json_serializable_metadata(metadata, "Agent run")
         collection_id = self.collection_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
@@ -705,6 +914,7 @@ class DocentTracer:
         if transcript_group_id is not None:
             payload["transcript_group_id"] = transcript_group_id
         if metadata is not None:
+            self._ensure_json_serializable_metadata(metadata, "Transcript")
             payload["metadata"] = metadata
         self._post_json("/v1/transcript-metadata", payload)
@@ -756,9 +966,7 @@ class DocentTracer:
             The transcript ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield transcript_id
             return
@@ -788,7 +996,7 @@ class DocentTracer:
                     transcript_id, name, description, transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript data: {e}")
+                logger.error(f"Failed sending transcript data: {e}")
             yield transcript_id
         finally:
@@ -818,9 +1026,7 @@ class DocentTracer:
             The transcript ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield transcript_id
             return
@@ -850,7 +1056,7 @@ class DocentTracer:
                     transcript_id, name, description, transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript data: {e}")
+                logger.error(f"Failed sending transcript data: {e}")
             yield transcript_id
         finally:
@@ -888,6 +1094,27 @@ class DocentTracer:
             )
             return
+        with self._transcript_group_state_lock:
+            state: dict[str, Optional[str]] = self._transcript_group_states.setdefault(
+                transcript_group_id, {}
+            )
+            final_name: Optional[str] = name if name is not None else state.get("name")
+            final_description: Optional[str] = (
+                description if description is not None else state.get("description")
+            )
+            final_parent_transcript_group_id: Optional[str] = (
+                parent_transcript_group_id
+                if parent_transcript_group_id is not None
+                else state.get("parent_transcript_group_id")
+            )
+            if final_name is not None:
+                state["name"] = final_name
+            if final_description is not None:
+                state["description"] = final_description
+            if final_parent_transcript_group_id is not None:
+                state["parent_transcript_group_id"] = final_parent_transcript_group_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
             "transcript_group_id": transcript_group_id,
@@ -895,13 +1122,14 @@ class DocentTracer:
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
-        if name is not None:
-            payload["name"] = name
-        if description is not None:
-            payload["description"] = description
-        if parent_transcript_group_id is not None:
-            payload["parent_transcript_group_id"] = parent_transcript_group_id
+        if final_name is not None:
+            payload["name"] = final_name
+        if final_description is not None:
+            payload["description"] = final_description
+        if final_parent_transcript_group_id is not None:
+            payload["parent_transcript_group_id"] = final_parent_transcript_group_id
         if metadata is not None:
+            self._ensure_json_serializable_metadata(metadata, "Transcript group")
             payload["metadata"] = metadata
         self._post_json("/v1/transcript-group-metadata", payload)
@@ -929,9 +1157,7 @@ class DocentTracer:
             The transcript group ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_group_id is None:
-                transcript_group_id = str(uuid.uuid4())
+            transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
             yield transcript_group_id
             return
@@ -963,7 +1189,7 @@ class DocentTracer:
                     transcript_group_id, name, description, parent_transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript group data: {e}")
+                logger.error(f"Failed sending transcript group data: {e}")
             yield transcript_group_id
         finally:
@@ -993,9 +1219,7 @@ class DocentTracer:
             The transcript group ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_group_id is None:
-                transcript_group_id = str(uuid.uuid4())
+            transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
             yield transcript_group_id
             return
@@ -1027,7 +1251,7 @@ class DocentTracer:
                     transcript_group_id, name, description, parent_transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript group data: {e}")
+                logger.error(f"Failed sending transcript group data: {e}")
             yield transcript_group_id
         finally:
@@ -1231,28 +1455,33 @@ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
         tracer.send_agent_run_metadata(agent_run_id, metadata)
     except Exception as e:
-        logger.error(f"Failed to send metadata: {e}")
+        logger.error(f"Failed to send agent run metadata: {e}")
 def transcript_metadata(
+    metadata: Dict[str, Any],
+    *,
     name: Optional[str] = None,
     description: Optional[str] = None,
     transcript_group_id: Optional[str] = None,
-    metadata: Optional[Dict[str, Any]] = None,
 ) -> None:
     """
     Send transcript metadata directly to the backend for the current transcript.
     Args:
+        metadata: Dictionary of metadata to attach to the current transcript (required)
         name: Optional transcript name
         description: Optional transcript description
-        parent_transcript_id: Optional parent transcript ID
-        metadata: Optional metadata to send
+        transcript_group_id: Optional transcript group ID to associate with
     Example:
-        transcript_metadata(name="data_processing", description="Process user data")
-        transcript_metadata(metadata={"user": "John", "model": "gpt-4"})
-        transcript_metadata(name="validation", parent_transcript_id="parent-123")
+        transcript_metadata({"user": "John", "model": "gpt-4"})
+        transcript_metadata({"env": "prod"}, name="data_processing")
+        transcript_metadata(
+            {"team": "search"},
+            name="validation",
+            transcript_group_id="group-123",
+        )
     """
     try:
         tracer = get_tracer()
@@ -1271,23 +1500,29 @@ def transcript_metadata(
 def transcript_group_metadata(
+    metadata: Dict[str, Any],
+    *,
     name: Optional[str] = None,
     description: Optional[str] = None,
     parent_transcript_group_id: Optional[str] = None,
-    metadata: Optional[Dict[str, Any]] = None,
 ) -> None:
     """
     Send transcript group metadata directly to the backend for the current transcript group.
     Args:
+        metadata: Dictionary of metadata to attach to the current transcript group (required)
         name: Optional transcript group name
         description: Optional transcript group description
         parent_transcript_group_id: Optional parent transcript group ID
-        metadata: Optional metadata to send
     Example:
-        transcript_group_metadata(name="pipeline", description="Main processing pipeline")
-        transcript_group_metadata(metadata={"team": "search", "env": "prod"})
+        transcript_group_metadata({"team": "search", "env": "prod"})
+        transcript_group_metadata({"env": "prod"}, name="pipeline")
+        transcript_group_metadata(
+            {"team": "search"},
+            name="pipeline",
+            parent_transcript_group_id="root-group",
+        )
     """
     try:
         tracer = get_tracer()
@@ -1324,6 +1559,11 @@ class AgentRunContext:
     def __enter__(self) -> tuple[str, str]:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.agent_run_id, self.transcript_id
         self._sync_context = get_tracer().agent_run_context(
             self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
         )
@@ -1336,6 +1576,11 @@ class AgentRunContext:
     async def __aenter__(self) -> tuple[str, str]:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.agent_run_id, self.transcript_id
         self._async_context = get_tracer().async_agent_run_context(
             self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
         )
@@ -1476,6 +1721,10 @@ class TranscriptContext:
     def __enter__(self) -> str:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.transcript_id
         self._sync_context = get_tracer().transcript_context(
             name=self.name,
             transcript_id=self.transcript_id,
@@ -1492,6 +1741,10 @@ class TranscriptContext:
     async def __aenter__(self) -> str:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.transcript_id
         self._async_context = get_tracer().async_transcript_context(
             name=self.name,
             transcript_id=self.transcript_id,
@@ -1653,6 +1906,12 @@ class TranscriptGroupContext:
     def __enter__(self) -> str:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_group_id = tracer.get_disabled_transcript_group_id(
+                self.transcript_group_id
+            )
+            return self.transcript_group_id
         self._sync_context = get_tracer().transcript_group_context(
             name=self.name,
             transcript_group_id=self.transcript_group_id,
@@ -1669,6 +1928,12 @@ class TranscriptGroupContext:
     async def __aenter__(self) -> str:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_group_id = tracer.get_disabled_transcript_group_id(
+                self.transcript_group_id
+            )
+            return self.transcript_group_id
         self._async_context = get_tracer().async_transcript_group_context(
             name=self.name,
             transcript_group_id=self.transcript_group_id,

{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.24a0
+Version: 0.1.28a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,16 @@
 docent/__init__.py,sha256=fuhETwJPcesiB76Zxa64HBJxeaaTyRalIH-fs77TWsU,112
 docent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docent/trace.py,sha256=PXMvXxtnYsd4xDxAX30SUZ32OoMuMrTpLbfq8f_QVmo,68565
+docent/trace.py,sha256=J05K9MykKGkeBjh9idTOPtiMA5_h0AdL8zRR-yKu5Yg,79525
 docent/trace_temp.py,sha256=Z0lAPwVzXjFvxpiU-CuvfWIslq9Q4alNkZMoQ77Xudk,40711
 docent/_llm_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docent/_llm_util/llm_cache.py,sha256=nGrvfFikFbEnfmzZRvWvZ60gfVSTvW1iC8-ciCXwbAk,6430
-docent/_llm_util/llm_svc.py,sha256=PQ-96UDJrnPa9csTKL_JDO8jzOrLzysVBqUHywuij0w,18046
-docent/_llm_util/model_registry.py,sha256=8Y4VwrA2f2EX78cG1VBIBHVvT_p4qqBTdu9a9zJpfTo,3382
+docent/_llm_util/llm_svc.py,sha256=LqrI8DdhqOmkcz3tsyzSlhrJv2gA4-0DE105WLys6sw,18156
+docent/_llm_util/model_registry.py,sha256=CdOi4g3eZCBQjLQDNQtprXpby0Ldc6AIRvLAD6Ajc90,3502
 docent/_llm_util/data_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docent/_llm_util/data_models/exceptions.py,sha256=IW4BVMVp8r5TufNXyrhy3acgwJiQQQPQjB9VA4RVXw8,1489
-docent/_llm_util/data_models/llm_output.py,sha256=ZAIIcgfxMZtTft8bXTPAhUcXEO48GLG3epkul_4gQNQ,10239
+docent/_llm_util/data_models/llm_output.py,sha256=UCYewoXN72skigN_fm414TzQol1KxmVbQGwgGVROE_4,10602
 docent/_llm_util/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-docent/_llm_util/providers/anthropic.py,sha256=-1oPd5FB4aFwKSmNvXzG8PVewjhgsogLRX1SCpnCxoA,18720
+docent/_llm_util/providers/anthropic.py,sha256=M5ryu_lGKZ3PDJLSCV07zsiAvEeAyEgZ13rbzOeutS8,18765
 docent/_llm_util/providers/common.py,sha256=dgcTuU4XkCKoAaM48UW8zMgRYUzj7TDBhvWqtnxBO7g,1166
 docent/_llm_util/providers/google.py,sha256=2D9mDgenZW0pt0_V7koX-aoZzpl8jo8xE5EWOLK7I0k,20314
 docent/_llm_util/providers/openai.py,sha256=4niQV9CNaJ-iiEwYG0BSFxCwcsCAWZz0JuUs4wBKu9M,25904
@@ -21,7 +21,7 @@ docent/_log_util/__init__.py,sha256=3HXXrxrSm8PxwG4llotrCnSnp7GuroK1FNHsdg6f7aE,
 docent/_log_util/logger.py,sha256=kwM0yRW1IJd6-XTorjWn48B4l8qvD2ZM6VDjY5eskQI,4422
 docent/data_models/__init__.py,sha256=vEcFppE6wtKFp37KF_hUv00Ncn6fK_qUbVGZE5ltz-o,383
 docent/data_models/_tiktoken_util.py,sha256=hC0EDDWItv5-0cONBnHWgZtQOflDU7ZNEhXPFo4DvPc,3057
-docent/data_models/agent_run.py,sha256=7_37I9aS9rhDTkAvMPwoJGssQldvvKte8qVb93EnAiY,19329
+docent/data_models/agent_run.py,sha256=D9KVGVChm2q4B_cruVYtQH-5Xk31ZxTYhoZn6RGrc_o,19392
 docent/data_models/citation.py,sha256=2_M1-_olVOJtjCGGFx1GIwGYWl0ILHxRsW8-EFDS9j0,7844
 docent/data_models/judge.py,sha256=BOKAfZmNoLPclJNz_b7NvH8G8FzfR7kc6OpIv91GMDQ,336
 docent/data_models/metadata_util.py,sha256=E-EClAP5vVm9xbfTlPSz0tUyCalOfN9Jujd6JGoRnBg,487
@@ -37,7 +37,7 @@ docent/data_models/chat/tool.py,sha256=MMglNHzkwHqUoK0xDWqs2FtelPsgHqwVpGpI1F8KZ
 docent/judges/__init__.py,sha256=aTsQ2mIQnZt8HEMau02KrEA4m5w-lGC3U9Dirkj3to4,500
 docent/judges/analysis.py,sha256=bn7XIT7mj77LjFHMh1PqjALknq3nN-fRXqgg8cfJF8o,2486
 docent/judges/impl.py,sha256=JOq2tEBTqNbWIG2gRuI8OmEW2dHdx7nfnJnHeGwdyOk,24035
-docent/judges/runner.py,sha256=ANUVrrfgT61_zTV9pErLXoerMiD6x_RIJQGpwxWIIMg,1928
+docent/judges/runner.py,sha256=k1OyEPEhAUiRiJpOAwbaAqsPHsKfseD7URXGqhVI974,4496
 docent/judges/stats.py,sha256=zejJle583xHG2G3gcYHiWcHoIOkeKwpSkl8lfeKQhFs,7805
 docent/judges/types.py,sha256=goNaKs3PF5wMHWLnFerYCEjUjPR0IVI9cVrxCK2TfjI,11539
 docent/judges/util/forgiving_json.py,sha256=zSh0LF3UVHdSjuMNvEiqUmSxpxPaqK1rSLiI6KCNihg,3549
@@ -52,8 +52,8 @@ docent/samples/log.eval,sha256=orrW__9WBfANq7NwKsPSq9oTsQRcG6KohG5tMr_X_XY,39770
 docent/samples/tb_airline.json,sha256=eR2jFFRtOw06xqbEglh6-dPewjifOk-cuxJq67Dtu5I,47028
 docent/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 docent/sdk/agent_run_writer.py,sha256=0AWdxejoqZyuj9JSA39WlEwGcMSYTWNqnzIuluySY-M,11043
-docent/sdk/client.py,sha256=aB_ILmzzK9JAC2kobtnp50stfINpSfNh54siaDlMEKc,19880
-docent_python-0.1.24a0.dist-info/METADATA,sha256=jTg2sD4AXMPXBpXJOGcwvE2GsJ9oO6zDp6g1UJhPqk0,1351
-docent_python-0.1.24a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-docent_python-0.1.24a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
-docent_python-0.1.24a0.dist-info/RECORD,,
+docent/sdk/client.py,sha256=BeW9nMlCVOyLN8o7S81ePX0ngFrmzJHMxa8YbundKgs,24321
+docent_python-0.1.28a0.dist-info/METADATA,sha256=7uIPnlYJFyZpE6xCEXwz4OlGD-_br4B4GY6DZ0uj7i8,1351
+docent_python-0.1.28a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+docent_python-0.1.28a0.dist-info/licenses/LICENSE.md,sha256=QIMv2UiT6MppRasso4ymaA0w7ltkqmlL0HCt8CLD7Rc,580
+docent_python-0.1.28a0.dist-info/RECORD,,

{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/WHEEL RENAMED Viewed

File without changes

{docent_python-0.1.24a0.dist-info → docent_python-0.1.28a0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

docent-python 0.1.24a0__py3-none-any.whl → 0.1.28a0__py3-none-any.whl

Potentially problematic release.

docent-python 0.1.24a0py3-none-any.whl → 0.1.28a0py3-none-any.whl