PyPI - docent-python - Versions diffs - 0.1.26a0__tar.gz → 0.1.28a0__tar.gz - Mend

docent-python 0.1.26a0tar.gz → 0.1.28a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/.gitignore RENAMED Viewed

@@ -13,6 +13,8 @@
 */.terraform/
 */*.terraform.*
+.idea/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.26a0
+Version: 0.1.28a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/docent/_llm_util/llm_svc.py RENAMED Viewed

@@ -75,7 +75,7 @@ async def _parallelize_calls(
     completion_callback: AsyncLLMOutputStreamingCallback | None,
     # Arguments for the individual completion getter
     client: Any,
-    inputs: list[MessagesInput],
+    inputs: Sequence[MessagesInput],
     model_name: str,
     tools: list[ToolInfo] | None,
     tool_choice: Literal["auto", "required"] | None,
@@ -306,7 +306,7 @@ async def _parallelize_calls(
 class BaseLLMService:
     def __init__(self, max_concurrency: int = DEFAULT_SVC_MAX_CONCURRENCY):
-        self._semaphore = Semaphore(max_concurrency)
+        self.max_concurrency, self._semaphore = max_concurrency, Semaphore(max_concurrency)
         self._client_cache: dict[tuple[str, str | None], Any] = {}  # (provider, api_key) -> client
         self._client_cache_lock = Lock()
@@ -326,7 +326,7 @@ class BaseLLMService:
     async def get_completions(
         self,
         *,
-        inputs: list[MessagesInput],
+        inputs: Sequence[MessagesInput],
         model_options: list[ModelOption],
         tools: list[ToolInfo] | None = None,
         tool_choice: Literal["auto", "required"] | None = None,

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/docent/_llm_util/model_registry.py RENAMED Viewed

@@ -54,6 +54,10 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
         "claude-sonnet-4",
         ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
     ),
+    (
+        "claude-haiku-4-5",
+        ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
+    ),
     (
         "gemini-2.5-flash-lite",
         ModelInfo(

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/docent/_llm_util/providers/anthropic.py RENAMED Viewed

@@ -178,7 +178,7 @@ def _parse_tool_choice(tool_choice: Literal["auto", "required"] | None) -> ToolC
 def _convert_anthropic_error(e: Exception):
     if isinstance(e, BadRequestError):
-        if "context limit" in e.message.lower():
+        if "context limit" in e.message.lower() or "prompt is too long" in e.message.lower():
             return ContextWindowException()
     if isinstance(e, RateLimitError):
         return RateLimitException(e)

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/docent/_llm_util/providers/openai.py RENAMED Viewed

@@ -18,13 +18,8 @@ from openai import (
     PermissionDeniedError,
     RateLimitError,
     UnprocessableEntityError,
+    omit,
 )
-try:
-    from openai import omit
-except ImportError:
-    from openai import Omit as _OpenAIOmit
-    omit = _OpenAIOmit()
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionAssistantMessageParam,

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/docent/data_models/agent_run.py RENAMED Viewed

@@ -125,6 +125,7 @@ class AgentRun(BaseModel):
         #     )
         # Append the text field
+        result.append({"name": "agent_run_id", "type": "str"})
         result.append({"name": "text", "type": "str"})
         return result

docent_python-0.1.28a0/docent/judges/runner.py ADDED Viewed

@@ -0,0 +1,129 @@
+from typing import Protocol, Sequence, runtime_checkable
+import anyio
+from tqdm.auto import tqdm
+from docent._llm_util.llm_svc import BaseLLMService
+from docent._log_util import get_logger
+from docent.data_models.agent_run import AgentRun
+from docent.judges import (
+    JudgeResult,
+    JudgeResultCompletionCallback,
+    Rubric,
+)
+from docent.judges.impl import build_judge
+logger = get_logger(__name__)
+@runtime_checkable
+class AgentRunResolver(Protocol):
+    async def __call__(self) -> AgentRun | None: ...
+AgentRunInput = AgentRun | AgentRunResolver
+async def _resolve_agent_run(agent_run_input: AgentRunInput) -> AgentRun | None:
+    if isinstance(agent_run_input, AgentRun):
+        return agent_run_input
+    else:
+        return await agent_run_input()
+async def run_rubric(
+    agent_runs: Sequence[AgentRunInput],
+    rubric: Rubric,
+    llm_svc: BaseLLMService,
+    callback: JudgeResultCompletionCallback | None = None,
+    *,
+    n_rollouts_per_input: int | list[int] = 1,
+    show_progress: bool = True,
+) -> list[JudgeResult | None]:
+    if not agent_runs:
+        raise ValueError("agent_runs must be a non-empty sequence")
+    if rubric.n_rollouts_per_input <= 0:
+        raise ValueError("rubric.n_rollouts_per_input must be greater than 0")
+    # Normalize n_rollouts_per_input to a list
+    if isinstance(n_rollouts_per_input, int):
+        if n_rollouts_per_input < 0:
+            raise ValueError("n_rollouts_per_input must be non-negative")
+        rollouts_per_run = [n_rollouts_per_input] * len(agent_runs)
+    else:
+        rollouts_per_run = n_rollouts_per_input
+        if len(rollouts_per_run) != len(agent_runs):
+            raise ValueError("n_rollouts_per_input list must match agent_runs length")
+        if any(n < 0 for n in rollouts_per_run):
+            raise ValueError("All values in n_rollouts_per_input must be non-negative")
+    judge = build_judge(rubric, llm_svc)
+    total_rollouts = sum(rollouts_per_run)
+    logger.info(
+        "Running rubric %s version %s against %d agent runs with %d total rollouts",
+        rubric.id,
+        rubric.version,
+        len(agent_runs),
+        total_rollouts,
+    )
+    agent_results: list[list[JudgeResult | None]] = [[] for _ in agent_runs]
+    progress_bar = tqdm(
+        total=total_rollouts,
+        desc=f"Rubric {rubric.id}",
+        disable=not show_progress,
+    )
+    # NOTE(mengk): using a (2 * llm max concurrency) semaphore is a hack to avoid
+    #   hammering _resolve_agent_run, which makes expensive DB calls, when they aren't going to be
+    #   immediately processed by the LLMService anyways.
+    # TODO(mengk): We should eventually implement a more idiomatic solution to this.
+    #   It's related to the idea of a global concurrency limiter.
+    run_judge_semaphore = anyio.Semaphore(llm_svc.max_concurrency * 2)
+    async def _run_single_judge(index: int, agent_run_input: AgentRunInput):
+        async with run_judge_semaphore:
+            rollout_results: list[JudgeResult | None] = []
+            if rollouts_per_run[index] == 0:
+                agent_results[index] = []
+                if callback is not None:
+                    await callback(index, None)
+                return
+            agent_run = await _resolve_agent_run(agent_run_input)
+            if agent_run is None:
+                if callback is not None:
+                    await callback(index, None)
+                return
+            for _ in range(rollouts_per_run[index]):
+                result = await judge(agent_run)
+                rollout_results.append(result)
+                progress_bar.update()
+            agent_results[index] = rollout_results
+            if callback is not None:
+                # Filter out None results for the callback
+                valid_results = [r for r in rollout_results if r is not None]
+                await callback(index, valid_results if valid_results else None)
+    try:
+        async with anyio.create_task_group() as tg:
+            for index, agent_run in enumerate(agent_runs):
+                tg.start_soon(_run_single_judge, index, agent_run)
+    finally:
+        progress_bar.close()
+    flattened_results = [result for rollouts in agent_results for result in rollouts]
+    successful = sum(result is not None for result in flattened_results)
+    logger.info(
+        "Finished rubric %s: produced %d/%d judge results",
+        rubric.id,
+        successful,
+        len(flattened_results),
+    )
+    return flattened_results

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/docent/sdk/client.py RENAMED Viewed

@@ -200,7 +200,7 @@ class Docent:
             version: The version of the rubric to get run state for. If None, the latest version is used.
         Returns:
-            dict: Dictionary containing rubric run state with results, job_id, and total_agent_runs.
+            dict: Dictionary containing rubric run state with results, job_id, and total_results_needed.
         Raises:
             requests.exceptions.HTTPError: If the API request fails.
@@ -450,6 +450,123 @@ class Docent:
         logger.info(f"Successfully shared Collection '{collection_id}' with {email}")
         return response.json()
+    def collection_exists(self, collection_id: str) -> bool:
+        """Check if a collection exists without raising if it does not."""
+        url = f"{self._server_url}/{collection_id}/exists"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return bool(response.json())
+    def has_collection_permission(self, collection_id: str, permission: str = "write") -> bool:
+        """Check whether the authenticated user has a specific permission on a collection.
+        Args:
+            collection_id: Collection to check.
+            permission: Permission level to verify (`read`, `write`, or `admin`).
+        Returns:
+            bool: True if the current API key has the requested permission; otherwise False.
+        Raises:
+            ValueError: If an unsupported permission value is provided.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        valid_permissions = {"read", "write", "admin"}
+        if permission not in valid_permissions:
+            raise ValueError(f"permission must be one of {sorted(valid_permissions)}")
+        url = f"{self._server_url}/{collection_id}/has_permission"
+        response = self._session.get(url, params={"permission": permission})
+        self._handle_response_errors(response)
+        payload = response.json()
+        return bool(payload.get("has_permission", False))
+    def get_dql_schema(self, collection_id: str) -> dict[str, Any]:
+        """Retrieve the DQL schema for a collection.
+        Args:
+            collection_id: ID of the Collection.
+        Returns:
+            dict: Dictionary containing available tables, columns, and metadata for DQL queries.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/dql/{collection_id}/schema"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return response.json()
+    def execute_dql(self, collection_id: str, dql: str) -> dict[str, Any]:
+        """Execute a DQL query against a collection.
+        Args:
+            collection_id: ID of the Collection.
+            dql: The DQL query string to execute.
+        Returns:
+            dict: Query execution results including rows, columns, execution metadata, and selected columns.
+        Raises:
+            ValueError: If `dql` is empty.
+            requests.exceptions.HTTPError: If the API request fails or the query is invalid.
+        """
+        if not dql.strip():
+            raise ValueError("dql must be a non-empty string")
+        url = f"{self._server_url}/dql/{collection_id}/execute"
+        response = self._session.post(url, json={"dql": dql})
+        self._handle_response_errors(response)
+        return response.json()
+    def select_agent_run_ids(
+        self,
+        collection_id: str,
+        where_clause: str | None = None,
+        limit: int | None = None,
+    ) -> list[str]:
+        """Convenience helper to fetch agent run IDs via DQL.
+        Args:
+            collection_id: ID of the Collection to query.
+            where_clause: Optional DQL WHERE clause applied to the agent_runs table.
+            limit: Optional LIMIT applied to the underlying DQL query.
+        Returns:
+            list[str]: Agent run IDs matching the criteria.
+        Raises:
+            ValueError: If the inputs are invalid.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        query = "SELECT agent_runs.id AS agent_run_id FROM agent_runs"
+        if where_clause:
+            where_clause = where_clause.strip()
+            if not where_clause:
+                raise ValueError("where_clause must be a non-empty string when provided")
+            query += f" WHERE {where_clause}"
+        if limit is not None:
+            if limit <= 0:
+                raise ValueError("limit must be a positive integer when provided")
+            query += f" LIMIT {limit}"
+        result = self.execute_dql(collection_id, query)
+        rows = result.get("rows", [])
+        agent_run_ids = [str(row[0]) for row in rows if row]
+        if result.get("truncated"):
+            logger.warning(
+                "DQL query truncated at applied limit %s; returning %s agent run IDs",
+                result.get("applied_limit"),
+                len(agent_run_ids),
+            )
+        return agent_run_ids
     def list_agent_run_ids(self, collection_id: str) -> list[str]:
         """Get all agent run IDs for a collection.

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/docent/trace.py RENAMED Viewed

@@ -1,4 +1,3 @@
-import asyncio
 import atexit
 import contextvars
 import itertools
@@ -9,13 +8,24 @@ import sys
 import threading
 import uuid
 from collections import defaultdict
-from concurrent.futures import Future, ThreadPoolExecutor
 from contextlib import asynccontextmanager, contextmanager
 from contextvars import ContextVar, Token
 from datetime import datetime, timezone
 from enum import Enum
 from importlib.metadata import Distribution, distributions
-from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Optional, Set, Union
+from typing import (
+    Any,
+    AsyncIterator,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    Union,
+    cast,
+)
 import requests
 from opentelemetry import trace
@@ -31,12 +41,23 @@ from opentelemetry.sdk.trace.export import (
     SimpleSpanProcessor,
 )
 from opentelemetry.trace import Span
+from requests import Response
 logger = logging.getLogger(__name__)
 # Default configuration
 DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
 DEFAULT_COLLECTION_NAME = "default-collection-name"
+ERROR_DETAIL_MAX_CHARS = 500
+# Sentinel values for when tracing is disabled
+DISABLED_AGENT_RUN_ID = "disabled"
+DISABLED_TRANSCRIPT_ID = "disabled"
+DISABLED_TRANSCRIPT_GROUP_ID = "disabled"
+class DocentTelemetryRequestError(RuntimeError):
+    """Raised when the Docent telemetry backend rejects a client request."""
 class Instruments(Enum):
@@ -135,10 +156,6 @@ class DocentTracer:
         self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
         self._transcript_group_state_lock = threading.Lock()
         self._flush_lock = threading.Lock()
-        self._http_executor: Optional[ThreadPoolExecutor] = None
-        self._http_executor_lock = threading.Lock()
-        self._pending_http_futures: Set[Future[Any]] = set()
-        self._pending_http_lock = threading.Lock()
     def get_current_agent_run_id(self) -> Optional[str]:
         """
@@ -448,12 +465,6 @@ class DocentTracer:
         try:
             self.flush()
-            if self._http_executor:
-                self._http_executor.shutdown(wait=True)
-                self._http_executor = None
-            with self._pending_http_lock:
-                self._pending_http_futures.clear()
             if self._tracer_provider:
                 self._tracer_provider.shutdown()
                 self._tracer_provider = None
@@ -484,7 +495,6 @@ class DocentTracer:
                 if hasattr(processor, "force_flush"):
                     logger.debug(f"Flushing span processor {i}")
                     processor.force_flush(timeout_millis=50)
-            self._wait_for_http_requests()
             logger.debug("Span flush completed")
         except Exception as e:
             logger.error(f"Error during flush: {e}")
@@ -503,6 +513,24 @@ class DocentTracer:
         """Verify if the manager is properly initialized."""
         return self._initialized
+    def get_disabled_agent_run_id(self, agent_run_id: Optional[str]) -> str:
+        """Return sentinel value for agent run ID when tracing is disabled."""
+        if agent_run_id is None:
+            return DISABLED_AGENT_RUN_ID
+        return agent_run_id
+    def get_disabled_transcript_id(self, transcript_id: Optional[str]) -> str:
+        """Return sentinel value for transcript ID when tracing is disabled."""
+        if transcript_id is None:
+            return DISABLED_TRANSCRIPT_ID
+        return transcript_id
+    def get_disabled_transcript_group_id(self, transcript_group_id: Optional[str]) -> str:
+        """Return sentinel value for transcript group ID when tracing is disabled."""
+        if transcript_group_id is None:
+            return DISABLED_TRANSCRIPT_GROUP_ID
+        return transcript_group_id
     @contextmanager
     def agent_run_context(
         self,
@@ -524,11 +552,8 @@ class DocentTracer:
             Tuple of (agent_run_id, transcript_id)
         """
         if self._disabled:
-            # Return dummy IDs when tracing is disabled
-            if agent_run_id is None:
-                agent_run_id = str(uuid.uuid4())
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield agent_run_id, transcript_id
             return
@@ -551,7 +576,7 @@ class DocentTracer:
                 try:
                     self.send_agent_run_metadata(agent_run_id, metadata)
                 except Exception as e:
-                    logger.warning(f"Failed sending agent run metadata: {e}")
+                    logger.error(f"Failed sending agent run metadata: {e}")
             yield agent_run_id, transcript_id
         finally:
@@ -581,11 +606,8 @@ class DocentTracer:
             Tuple of (agent_run_id, transcript_id)
         """
         if self._disabled:
-            # Return dummy IDs when tracing is disabled
-            if agent_run_id is None:
-                agent_run_id = str(uuid.uuid4())
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            agent_run_id = self.get_disabled_agent_run_id(agent_run_id)
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield agent_run_id, transcript_id
             return
@@ -631,48 +653,6 @@ class DocentTracer:
         return headers
-    def _get_http_executor(self) -> ThreadPoolExecutor:
-        with self._http_executor_lock:
-            if self._http_executor is None:
-                self._http_executor = ThreadPoolExecutor(
-                    max_workers=4, thread_name_prefix="docent-http"
-                )
-            return self._http_executor
-    def _should_run_http_in_background(self) -> bool:
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            return False
-        return loop.is_running()
-    def _on_http_future_done(self, future: Future[Any]) -> None:
-        with self._pending_http_lock:
-            self._pending_http_futures.discard(future)
-        try:
-            future.result()
-        except Exception as exc:  # pragma: no cover - defensive logging
-            logger.error(f"Background HTTP request failed: {exc}")
-    def _schedule_background_post(self, task: Callable[[], None]) -> None:
-        executor = self._get_http_executor()
-        future = executor.submit(task)
-        with self._pending_http_lock:
-            self._pending_http_futures.add(future)
-        future.add_done_callback(self._on_http_future_done)
-    def _wait_for_http_requests(self) -> None:
-        while True:
-            with self._pending_http_lock:
-                pending = list(self._pending_http_futures)
-            if not pending:
-                break
-            for future in pending:
-                try:
-                    future.result()
-                except Exception as exc:  # pragma: no cover - defensive logging
-                    logger.error(f"Background HTTP request failed: {exc}")
     def _ensure_json_serializable_metadata(self, metadata: Dict[str, Any], context: str) -> None:
         """
         Validate that metadata can be serialized to JSON before sending it to the backend.
@@ -681,13 +661,14 @@ class DocentTracer:
             json.dumps(metadata)
         except (TypeError, ValueError) as exc:
             raise TypeError(f"{context} metadata must be JSON serializable") from exc
+        offending_path = self._find_null_character_path(metadata)
+        if offending_path is not None:
+            raise ValueError(
+                f"{context} metadata cannot contain null characters (found at {offending_path}). "
+                "Remove or replace '\\u0000' before calling Docent tracing APIs."
+            )
-    def _post_json(
-        self, path: str, data: Dict[str, Any], *, allow_background: bool = False
-    ) -> None:
-        if allow_background and self._should_run_http_in_background():
-            self._schedule_background_post(lambda: self._post_json_sync(path, data))
-            return
+    def _post_json(self, path: str, data: Dict[str, Any]) -> None:
         self._post_json_sync(path, data)
     def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
@@ -697,8 +678,159 @@ class DocentTracer:
         try:
             resp = requests.post(url, json=data, headers=self._api_headers(), timeout=(10, 60))
             resp.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Failed POST {url}: {e}")
+        except requests.exceptions.RequestException as exc:
+            message = self._format_request_exception(url, exc)
+            raise DocentTelemetryRequestError(message) from exc
+    def _format_request_exception(self, url: str, exc: requests.exceptions.RequestException) -> str:
+        response: Optional[Response] = getattr(exc, "response", None)
+        message_parts: List[str] = [f"Failed POST {url}"]
+        suggestion: Optional[str]
+        if response is not None:
+            status_phrase = f"HTTP {response.status_code}"
+            if response.reason:
+                status_phrase = f"{status_phrase} {response.reason}"
+            message_parts.append(f"({status_phrase})")
+            detail = self._extract_response_detail(response)
+            if detail:
+                message_parts.append(f"- Backend detail: {detail}")
+            request_id = response.headers.get("x-request-id")
+            if request_id:
+                message_parts.append(f"(request-id: {request_id})")
+            suggestion = self._suggest_fix_for_status(response.status_code)
+        else:
+            message_parts.append(f"- {exc}")
+            suggestion = self._suggest_fix_for_status(None)
+        if suggestion:
+            message_parts.append(suggestion)
+        return " ".join(part for part in message_parts if part)
+    def _extract_response_detail(self, response: Response) -> Optional[str]:
+        try:
+            body = response.json()
+        except ValueError:
+            text = response.text.strip()
+            if not text:
+                return None
+            normalized = " ".join(text.split())
+            return self._truncate_error_message(normalized)
+        if isinstance(body, dict):
+            typed_body = cast(Dict[str, Any], body)
+            structured_message = self._structured_detail_message(typed_body)
+            if structured_message:
+                return self._truncate_error_message(structured_message)
+            return self._truncate_error_message(self._normalize_error_value(typed_body))
+        return self._truncate_error_message(self._normalize_error_value(body))
+    def _structured_detail_message(self, data: Dict[str, Any]) -> Optional[str]:
+        for key in ("detail", "message", "error"):
+            if key in data:
+                structured_value = self._structured_detail_value(data[key])
+                if structured_value:
+                    return structured_value
+        return self._structured_detail_value(data)
+    def _structured_detail_value(self, value: Any) -> Optional[str]:
+        if isinstance(value, Mapping):
+            mapping_value = cast(Mapping[str, Any], value)
+            message = mapping_value.get("message")
+            hint = mapping_value.get("hint")
+            error_code = mapping_value.get("error_code")
+            request_id = mapping_value.get("request_id")
+            fallback_detail = mapping_value.get("detail")
+            parts: List[str] = []
+            if isinstance(message, str) and message.strip():
+                parts.append(message.strip())
+            elif isinstance(fallback_detail, str) and fallback_detail.strip():
+                parts.append(fallback_detail.strip())
+            if isinstance(hint, str) and hint.strip():
+                parts.append(f"(hint: {hint.strip()})")
+            if isinstance(error_code, str) and error_code.strip():
+                parts.append(f"[code: {error_code.strip()}]")
+            if isinstance(request_id, str) and request_id.strip():
+                parts.append(f"(request-id: {request_id.strip()})")
+            return " ".join(parts) if parts else None
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+        return None
+    def _normalize_error_value(self, value: Any) -> str:
+        if isinstance(value, str):
+            return " ".join(value.split())
+        try:
+            serialized = json.dumps(value)
+        except (TypeError, ValueError):
+            serialized = str(value)
+        return " ".join(serialized.split())
+    def _truncate_error_message(self, message: str) -> str:
+        message = message.strip()
+        if len(message) <= ERROR_DETAIL_MAX_CHARS:
+            return message
+        return f"{message[:ERROR_DETAIL_MAX_CHARS]}..."
+    def _suggest_fix_for_status(self, status_code: Optional[int]) -> Optional[str]:
+        if status_code in (401, 403):
+            return (
+                "Verify that the Authorization header or DOCENT_API_KEY grants write access to the "
+                "target collection."
+            )
+        if status_code == 404:
+            return (
+                "Ensure the tracing endpoint passed to initialize_tracing matches the Docent server's "
+                "/rest/telemetry route."
+            )
+        if status_code in (400, 422):
+            return (
+                "Confirm the payload includes collection_id, agent_run_id, metadata, and timestamp in "
+                "the expected format."
+            )
+        if status_code and status_code >= 500:
+            return "Inspect the Docent backend logs for the referenced request."
+        if status_code is None:
+            return "Confirm the Docent telemetry endpoint is reachable from this process."
+        return None
+    def _find_null_character_path(self, value: Any, path: str = "") -> Optional[str]:
+        """Backend rejects NUL bytes, so detect them before we send metadata to the backend."""
+        return None
+        if isinstance(value, str):
+            if "\x00" in value or "\\u0000" in value or "\\x00" in value:
+                return path or "<root>"
+            return None
+        if isinstance(value, dict):
+            for key, item in value.items():
+                next_path = f"{path}.{key}" if path else str(key)
+                result = self._find_null_character_path(item, next_path)
+                if result:
+                    return result
+            return None
+        if isinstance(value, (list, tuple)):
+            for index, item in enumerate(value):
+                next_path = f"{path}[{index}]" if path else f"[{index}]"
+                result = self._find_null_character_path(item, next_path)
+                if result:
+                    return result
+            return None
+        return None
     def send_agent_run_score(
         self,
@@ -744,7 +876,7 @@ class DocentTracer:
             "metadata": metadata,
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
-        self._post_json("/v1/agent-run-metadata", payload, allow_background=True)
+        self._post_json("/v1/agent-run-metadata", payload)
     def send_transcript_metadata(
         self,
@@ -834,9 +966,7 @@ class DocentTracer:
             The transcript ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield transcript_id
             return
@@ -866,7 +996,7 @@ class DocentTracer:
                     transcript_id, name, description, transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript data: {e}")
+                logger.error(f"Failed sending transcript data: {e}")
             yield transcript_id
         finally:
@@ -896,9 +1026,7 @@ class DocentTracer:
             The transcript ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_id is None:
-                transcript_id = str(uuid.uuid4())
+            transcript_id = self.get_disabled_transcript_id(transcript_id)
             yield transcript_id
             return
@@ -928,7 +1056,7 @@ class DocentTracer:
                     transcript_id, name, description, transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript data: {e}")
+                logger.error(f"Failed sending transcript data: {e}")
             yield transcript_id
         finally:
@@ -1029,9 +1157,7 @@ class DocentTracer:
             The transcript group ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_group_id is None:
-                transcript_group_id = str(uuid.uuid4())
+            transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
             yield transcript_group_id
             return
@@ -1063,7 +1189,7 @@ class DocentTracer:
                     transcript_group_id, name, description, parent_transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript group data: {e}")
+                logger.error(f"Failed sending transcript group data: {e}")
             yield transcript_group_id
         finally:
@@ -1093,9 +1219,7 @@ class DocentTracer:
             The transcript group ID
         """
         if self._disabled:
-            # Return dummy ID when tracing is disabled
-            if transcript_group_id is None:
-                transcript_group_id = str(uuid.uuid4())
+            transcript_group_id = self.get_disabled_transcript_group_id(transcript_group_id)
             yield transcript_group_id
             return
@@ -1127,7 +1251,7 @@ class DocentTracer:
                     transcript_group_id, name, description, parent_transcript_group_id, metadata
                 )
             except Exception as e:
-                logger.warning(f"Failed sending transcript group data: {e}")
+                logger.error(f"Failed sending transcript group data: {e}")
             yield transcript_group_id
         finally:
@@ -1331,28 +1455,33 @@ def agent_run_metadata(metadata: Dict[str, Any]) -> None:
         tracer.send_agent_run_metadata(agent_run_id, metadata)
     except Exception as e:
-        logger.error(f"Failed to send metadata: {e}")
+        logger.error(f"Failed to send agent run metadata: {e}")
 def transcript_metadata(
+    metadata: Dict[str, Any],
+    *,
     name: Optional[str] = None,
     description: Optional[str] = None,
     transcript_group_id: Optional[str] = None,
-    metadata: Optional[Dict[str, Any]] = None,
 ) -> None:
     """
     Send transcript metadata directly to the backend for the current transcript.
     Args:
+        metadata: Dictionary of metadata to attach to the current transcript (required)
         name: Optional transcript name
         description: Optional transcript description
-        parent_transcript_id: Optional parent transcript ID
-        metadata: Optional metadata to send
+        transcript_group_id: Optional transcript group ID to associate with
     Example:
-        transcript_metadata(name="data_processing", description="Process user data")
-        transcript_metadata(metadata={"user": "John", "model": "gpt-4"})
-        transcript_metadata(name="validation", parent_transcript_id="parent-123")
+        transcript_metadata({"user": "John", "model": "gpt-4"})
+        transcript_metadata({"env": "prod"}, name="data_processing")
+        transcript_metadata(
+            {"team": "search"},
+            name="validation",
+            transcript_group_id="group-123",
+        )
     """
     try:
         tracer = get_tracer()
@@ -1371,23 +1500,29 @@ def transcript_metadata(
 def transcript_group_metadata(
+    metadata: Dict[str, Any],
+    *,
     name: Optional[str] = None,
     description: Optional[str] = None,
     parent_transcript_group_id: Optional[str] = None,
-    metadata: Optional[Dict[str, Any]] = None,
 ) -> None:
     """
     Send transcript group metadata directly to the backend for the current transcript group.
     Args:
+        metadata: Dictionary of metadata to attach to the current transcript group (required)
         name: Optional transcript group name
         description: Optional transcript group description
         parent_transcript_group_id: Optional parent transcript group ID
-        metadata: Optional metadata to send
     Example:
-        transcript_group_metadata(name="pipeline", description="Main processing pipeline")
-        transcript_group_metadata(metadata={"team": "search", "env": "prod"})
+        transcript_group_metadata({"team": "search", "env": "prod"})
+        transcript_group_metadata({"env": "prod"}, name="pipeline")
+        transcript_group_metadata(
+            {"team": "search"},
+            name="pipeline",
+            parent_transcript_group_id="root-group",
+        )
     """
     try:
         tracer = get_tracer()
@@ -1424,6 +1559,11 @@ class AgentRunContext:
     def __enter__(self) -> tuple[str, str]:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.agent_run_id, self.transcript_id
         self._sync_context = get_tracer().agent_run_context(
             self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
         )
@@ -1436,6 +1576,11 @@ class AgentRunContext:
     async def __aenter__(self) -> tuple[str, str]:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.agent_run_id = tracer.get_disabled_agent_run_id(self.agent_run_id)
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.agent_run_id, self.transcript_id
         self._async_context = get_tracer().async_agent_run_context(
             self.agent_run_id, self.transcript_id, metadata=self.metadata, **self.attributes
         )
@@ -1576,6 +1721,10 @@ class TranscriptContext:
     def __enter__(self) -> str:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.transcript_id
         self._sync_context = get_tracer().transcript_context(
             name=self.name,
             transcript_id=self.transcript_id,
@@ -1592,6 +1741,10 @@ class TranscriptContext:
     async def __aenter__(self) -> str:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_id = tracer.get_disabled_transcript_id(self.transcript_id)
+            return self.transcript_id
         self._async_context = get_tracer().async_transcript_context(
             name=self.name,
             transcript_id=self.transcript_id,
@@ -1753,6 +1906,12 @@ class TranscriptGroupContext:
     def __enter__(self) -> str:
         """Sync context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_group_id = tracer.get_disabled_transcript_group_id(
+                self.transcript_group_id
+            )
+            return self.transcript_group_id
         self._sync_context = get_tracer().transcript_group_context(
             name=self.name,
             transcript_group_id=self.transcript_group_id,
@@ -1769,6 +1928,12 @@ class TranscriptGroupContext:
     async def __aenter__(self) -> str:
         """Async context manager entry."""
+        if is_disabled():
+            tracer = get_tracer()
+            self.transcript_group_id = tracer.get_disabled_transcript_group_id(
+                self.transcript_group_id
+            )
+            return self.transcript_group_id
         self._async_context = get_tracer().async_transcript_group_context(
             name=self.name,
             transcript_group_id=self.transcript_group_id,

{docent_python-0.1.26a0 → docent_python-0.1.28a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.26-alpha"
+version = "0.1.28-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]

docent_python-0.1.26a0/docent/judges/runner.py DELETED Viewed

@@ -1,66 +0,0 @@
-import anyio
-from tqdm.auto import tqdm
-from docent._llm_util.llm_svc import BaseLLMService
-from docent._log_util import get_logger
-from docent.data_models.agent_run import AgentRun
-from docent.judges import (
-    JudgeResult,
-    JudgeResultCompletionCallback,
-    Rubric,
-)
-from docent.judges.impl import build_judge
-logger = get_logger(__name__)
-async def run_rubric(
-    agent_runs: list[AgentRun],
-    rubric: Rubric,
-    llm_svc: BaseLLMService,
-    callback: JudgeResultCompletionCallback | None = None,
-    *,
-    show_progress: bool = True,
-) -> list[JudgeResult | None]:
-    if not agent_runs:
-        raise ValueError("agent_runs must be a non-empty sequence")
-    if rubric.n_rollouts_per_input <= 0:
-        raise ValueError("rubric.n_rollouts_per_input must be greater than 0")
-    judge = build_judge(rubric, llm_svc)
-    logger.info(
-        "Running rubric %s version %s against %d agent runs",
-        rubric.id,
-        rubric.version,
-        len(agent_runs),
-    )
-    agent_results: list[JudgeResult | None] = [None for _ in agent_runs]
-    progress_bar = tqdm(
-        total=len(agent_runs), desc=f"Rubric {rubric.id}", disable=not show_progress
-    )
-    async def _run_single_judge(index: int, agent_run: AgentRun):
-        agent_results[index] = result = await judge(agent_run)
-        if callback is not None:
-            await callback(index, [result] if result is not None else None)
-        progress_bar.update()
-    try:
-        async with anyio.create_task_group() as tg:
-            for index, agent_run in enumerate(agent_runs):
-                tg.start_soon(_run_single_judge, index, agent_run)
-    finally:
-        progress_bar.close()
-    successful = sum(result is not None for result in agent_results)
-    logger.info(
-        "Finished rubric %s: produced %d/%d judge results",
-        rubric.id,
-        successful,
-        len(agent_results),
-    )
-    return agent_results