PyPI - docent-python - Versions diffs - 0.1.18a0__tar.gz → 0.1.19a0__tar.gz - Mend

docent-python 0.1.18a0tar.gz → 0.1.19a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docent-python might be problematic. Click here for more details.

Files changed (36) hide show

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.18a0
+Version: 0.1.19a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/__init__.py RENAMED Viewed

@@ -1,11 +1,13 @@
 from docent.data_models.agent_run import AgentRun
 from docent.data_models.citation import Citation
+from docent.data_models.judge import JudgeRunLabel
 from docent.data_models.regex import RegexSnippet
 from docent.data_models.transcript import Transcript, TranscriptGroup
 __all__ = [
     "AgentRun",
     "Citation",
+    "JudgeRunLabel",
     "RegexSnippet",
     "Transcript",
     "TranscriptGroup",

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/agent_run.py RENAMED Viewed

@@ -17,8 +17,8 @@ from pydantic_core import to_jsonable_python
 from docent._log_util import get_logger
 from docent.data_models._tiktoken_util import get_token_count, group_messages_into_ranges
+from docent.data_models.metadata_util import dump_metadata
 from docent.data_models.transcript import Transcript, TranscriptGroup
-from docent.data_models.yaml_util import yaml_dump_metadata
 logger = get_logger(__name__)
@@ -446,10 +446,10 @@ class AgentRun(BaseModel):
         text = _recurse("__global_root")
         # Append agent run metadata below the full content
-        yaml_text = yaml_dump_metadata(self.metadata)
-        if yaml_text is not None:
+        metadata_text = dump_metadata(self.metadata)
+        if metadata_text is not None:
             if indent > 0:
-                yaml_text = textwrap.indent(yaml_text, " " * indent)
-            text += f"\n<|agent run metadata|>\n{yaml_text}\n</|agent run metadata|>"
+                metadata_text = textwrap.indent(metadata_text, " " * indent)
+            text += f"\n<|agent run metadata|>\n{metadata_text}\n</|agent run metadata|>"
         return text

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/chat/__init__.py RENAMED Viewed

@@ -7,7 +7,12 @@ from docent.data_models.chat.message import (
     UserMessage,
     parse_chat_message,
 )
-from docent.data_models.chat.tool import ToolCall, ToolCallContent, ToolInfo, ToolParams
+from docent.data_models.chat.tool import (
+    ToolCall,
+    ToolCallContent,
+    ToolInfo,
+    ToolParams,
+)
 __all__ = [
     "ChatMessage",

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/citation.py RENAMED Viewed

@@ -1,15 +1,27 @@
 import re
+from dataclasses import dataclass
 from pydantic import BaseModel
+@dataclass
+class ParsedCitation:
+    """Represents a parsed citation before conversion to full Citation object."""
+    transcript_idx: int | None
+    block_idx: int | None
+    metadata_key: str | None = None
+    start_pattern: str | None = None
 class Citation(BaseModel):
     start_idx: int
     end_idx: int
     agent_run_idx: int | None = None
     transcript_idx: int | None = None
-    block_idx: int
+    block_idx: int | None = None
     action_unit_idx: int | None = None
+    metadata_key: str | None = None
     start_pattern: str | None = None
@@ -17,6 +29,9 @@ RANGE_BEGIN = "<RANGE>"
 RANGE_END = "</RANGE>"
 _SINGLE_RE = re.compile(r"T(\d+)B(\d+)")
+_METADATA_RE = re.compile(r"^M\.([^:]+)$")  # [M.key]
+_TRANSCRIPT_METADATA_RE = re.compile(r"^T(\d+)M\.([^:]+)$")  # [T0M.key]
+_MESSAGE_METADATA_RE = re.compile(r"^T(\d+)B(\d+)M\.([^:]+)$")  # [T0B1M.key]
 _RANGE_CONTENT_RE = re.compile(r":\s*" + re.escape(RANGE_BEGIN) + r".*?" + re.escape(RANGE_END))
@@ -70,41 +85,93 @@ def scan_brackets(text: str) -> list[tuple[int, int, str]]:
     return matches
-def parse_single_citation(part: str) -> tuple[int, int, str | None] | None:
+def parse_single_citation(part: str) -> ParsedCitation | None:
     """
     Parse a single citation token inside a bracket and return its components.
-    Returns (transcript_idx, block_idx, start_pattern) or None if invalid.
+    Returns ParsedCitation or None if invalid.
+    For metadata citations, transcript_idx may be None (for agent run metadata).
+    Supports optional text range for all valid citation kinds.
     """
     token = part.strip()
     if not token:
         return None
+    # Extract optional range part
+    start_pattern: str | None = None
+    citation_part = token
     if ":" in token:
-        citation_part, range_part = token.split(":", 1)
-        single_match = _SINGLE_RE.match(citation_part.strip())
-        if not single_match:
+        left, right = token.split(":", 1)
+        citation_part = left.strip()
+        start_pattern = _extract_range_pattern(right)
+    # Try matches in order of specificity
+    # 1) Message metadata [T0B0M.key]
+    m = _MESSAGE_METADATA_RE.match(citation_part)
+    if m:
+        transcript_idx = int(m.group(1))
+        block_idx = int(m.group(2))
+        metadata_key = m.group(3)
+        # Disallow nested keys like status.code per instruction
+        if "." in metadata_key:
             return None
-        transcript_idx = int(single_match.group(1))
-        block_idx = int(single_match.group(2))
-        start_pattern = _extract_range_pattern(range_part)
-        return transcript_idx, block_idx, start_pattern
-    else:
-        single_match = _SINGLE_RE.match(token)
-        if not single_match:
+        return ParsedCitation(
+            transcript_idx=transcript_idx,
+            block_idx=block_idx,
+            metadata_key=metadata_key,
+            start_pattern=start_pattern,
+        )
+    # 2) Transcript metadata [T0M.key]
+    m = _TRANSCRIPT_METADATA_RE.match(citation_part)
+    if m:
+        transcript_idx = int(m.group(1))
+        metadata_key = m.group(2)
+        if "." in metadata_key:
             return None
-        transcript_idx = int(single_match.group(1))
-        block_idx = int(single_match.group(2))
-        return transcript_idx, block_idx, None
+        return ParsedCitation(
+            transcript_idx=transcript_idx,
+            block_idx=None,
+            metadata_key=metadata_key,
+            start_pattern=start_pattern,
+        )
+    # 3) Agent run metadata [M.key]
+    m = _METADATA_RE.match(citation_part)
+    if m:
+        metadata_key = m.group(1)
+        if "." in metadata_key:
+            return None
+        return ParsedCitation(
+            transcript_idx=None,
+            block_idx=None,
+            metadata_key=metadata_key,
+            start_pattern=start_pattern,
+        )
+    # 4) Regular transcript block [T0B0]
+    m = _SINGLE_RE.match(citation_part)
+    if m:
+        transcript_idx = int(m.group(1))
+        block_idx = int(m.group(2))
+        return ParsedCitation(
+            transcript_idx=transcript_idx, block_idx=block_idx, start_pattern=start_pattern
+        )
+    return None
 def parse_citations(text: str) -> tuple[str, list[Citation]]:
     """
-    Parse citations from text in the format described by BLOCK_RANGE_CITE_INSTRUCTION.
+    Parse citations from text in the format described by TEXT_RANGE_CITE_INSTRUCTION.
     Supported formats:
     - Single block: [T<key>B<idx>]
     - Text range with start pattern: [T<key>B<idx>:<RANGE>start_pattern</RANGE>]
+    - Agent run metadata: [M.key]
+    - Transcript metadata: [T<key>M.key]
+    - Message metadata: [T<key>B<idx>M.key]
+    - Message metadata with text range: [T<key>B<idx>M.key:<RANGE>start_pattern</RANGE>]
     Args:
         text: The text to parse citations from
@@ -127,8 +194,21 @@ def parse_citations(text: str) -> tuple[str, list[Citation]]:
         # Parse a single citation token inside the bracket
         parsed = parse_single_citation(bracket_content)
         if parsed:
-            transcript_idx, block_idx, start_pattern = parsed
-            replacement = f"T{transcript_idx}B{block_idx}"
+            # Create appropriate replacement text based on citation type
+            if parsed.metadata_key:
+                if parsed.transcript_idx is None:
+                    # Agent run metadata [M.key]
+                    replacement = "run metadata"
+                elif parsed.block_idx is None:
+                    # Transcript metadata [T0M.key]
+                    replacement = f"T{parsed.transcript_idx}"
+                else:
+                    # Message metadata [T0B1M.key]
+                    replacement = f"T{parsed.transcript_idx}B{parsed.block_idx}"
+            else:
+                # Regular transcript block [T0B1]
+                replacement = f"T{parsed.transcript_idx}B{parsed.block_idx}"
             # Current absolute start position for this replacement in the cleaned text
             start_idx = len(cleaned_text)
             end_idx = start_idx + len(replacement)
@@ -137,10 +217,11 @@ def parse_citations(text: str) -> tuple[str, list[Citation]]:
                     start_idx=start_idx,
                     end_idx=end_idx,
                     agent_run_idx=None,
-                    transcript_idx=transcript_idx,
-                    block_idx=block_idx,
+                    transcript_idx=parsed.transcript_idx,
+                    block_idx=parsed.block_idx,
                     action_unit_idx=None,
-                    start_pattern=start_pattern,
+                    metadata_key=parsed.metadata_key,
+                    start_pattern=parsed.start_pattern,
                 )
             )
             cleaned_text += replacement

docent_python-0.1.19a0/docent/data_models/judge.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Judge-related data models shared across Docent components."""
+from typing import Any
+from uuid import uuid4
+from pydantic import BaseModel, Field
+class JudgeRunLabel(BaseModel):
+    id: str = Field(default_factory=lambda: str(uuid4()))
+    agent_run_id: str
+    rubric_id: str
+    label: dict[str, Any]
+__all__ = ["JudgeRunLabel"]

docent_python-0.1.19a0/docent/data_models/metadata_util.py ADDED Viewed

@@ -0,0 +1,16 @@
+import json
+from typing import Any
+from pydantic_core import to_jsonable_python
+def dump_metadata(metadata: dict[str, Any]) -> str | None:
+    """
+    Dump metadata to a JSON string.
+    We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
+    """
+    if not metadata:
+        return None
+    metadata_obj = to_jsonable_python(metadata)
+    text = json.dumps(metadata_obj, indent=2)
+    return text.strip()

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/remove_invalid_citation_ranges.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import json
 import re
 from docent.data_models.agent_run import AgentRun
@@ -52,7 +53,7 @@ def find_citation_matches_in_text(text: str, start_pattern: str) -> list[tuple[i
 def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) -> str | None:
     """
-    Get the text content of a specific transcript block from an AgentRun,
+    Get the text content of a specific transcript block (or transcript/run metadata) from an AgentRun,
     using the same formatting as shown to LLMs via format_chat_message.
     Args:
@@ -62,19 +63,28 @@ def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) ->
     Returns:
         Text content of the specified block (including tool calls), or None if not found
     """
-    if citation.transcript_idx is None:
-        return None
     try:
-        if citation.transcript_idx >= len(agent_run.get_transcript_ids_ordered()):
+        if citation.transcript_idx is None:
+            # At the run level, can only cite metadata
+            if citation.metadata_key is not None:
+                return json.dumps(agent_run.metadata.get(citation.metadata_key))
             return None
         transcript_id = agent_run.get_transcript_ids_ordered()[citation.transcript_idx]
         transcript = agent_run.transcript_dict[transcript_id]
-        if citation.block_idx >= len(transcript.messages):
+        if citation.block_idx is None:
+            # At the transcript level, can only cite metadata
+            if citation.metadata_key is not None:
+                return json.dumps(transcript.metadata.get(citation.metadata_key))
             return None
         message = transcript.messages[citation.block_idx]
+        # At the message level, can cite metadata or content
+        if citation.metadata_key is not None:
+            return json.dumps(message.metadata.get(citation.metadata_key))
         # Use the same formatting function that generates content for LLMs
         # This ensures consistent formatting between citation validation and LLM serialization
         return format_chat_message(
@@ -99,6 +109,9 @@ def validate_citation_text_range(agent_run: AgentRun, citation: Citation) -> boo
     if not citation.start_pattern:
         # Nothing to validate
         return True
+    if citation.metadata_key is not None:
+        # We don't need to remove invalid metadata citation ranges
+        return True
     text = get_transcript_text_for_citation(agent_run, citation)
     if text is None:
@@ -130,16 +143,16 @@ def remove_invalid_citation_ranges(text: str, agent_run: AgentRun) -> str:
         # Parse this bracket content to get citation info
         parsed = parse_single_citation(bracket_content)
         if parsed:
-            transcript_idx, block_idx, start_pattern = parsed
             # The citation spans from start to end in the original text
             citation = Citation(
                 start_idx=start,
                 end_idx=end,
                 agent_run_idx=None,
-                transcript_idx=transcript_idx,
-                block_idx=block_idx,
+                transcript_idx=parsed.transcript_idx,
+                block_idx=parsed.block_idx,
                 action_unit_idx=None,
-                start_pattern=start_pattern,
+                metadata_key=parsed.metadata_key,
+                start_pattern=parsed.start_pattern,
             )
             citations.append(citation)

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/data_models/transcript.py RENAMED Viewed

@@ -15,7 +15,7 @@ from docent.data_models._tiktoken_util import (
 )
 from docent.data_models.chat import AssistantMessage, ChatMessage, ContentReasoning
 from docent.data_models.citation import RANGE_BEGIN, RANGE_END
-from docent.data_models.yaml_util import yaml_dump_metadata
+from docent.data_models.metadata_util import dump_metadata
 # Template for formatting individual transcript blocks
 TRANSCRIPT_BLOCK_TEMPLATE = """
@@ -29,6 +29,12 @@ TEXT_RANGE_CITE_INSTRUCTION = f"""Anytime you quote the transcript, or refer to
 A citation may include a specific range of text within a block. Use {RANGE_BEGIN} and {RANGE_END} to mark the specific range of text. Add it after the block ID separated by a colon. For example, to cite the part of transcript 0, block 1, where the agent says "I understand the task", write [T0B1:{RANGE_BEGIN}I understand the task{RANGE_END}]. Citations must follow this exact format. The markers {RANGE_BEGIN} and {RANGE_END} must be used ONLY inside the brackets of a citation.
+- You may cite a top-level key in the agent run metadata like this: [M.task_description].
+- You may cite a top-level key in transcript metadata. For example, for transcript 0: [T0M.start_time].
+- You may cite a top-level key in message metadata for a block. For example, for transcript 0, block 1: [T0B1M.status].
+- You may not cite nested keys. For example, [T0B1M.status.code] is invalid.
+- Within a top-level metadata key you may cite a range of text that appears in the value. For example, [T0B1M.status:{RANGE_BEGIN}"running":false{RANGE_END}].
 Important notes:
 - You must include the full content of the text range {RANGE_BEGIN} and {RANGE_END}, EXACTLY as it appears in the transcript, word-for-word, including any markers or punctuation that appear in the middle of the text.
 - Citations must be as specific as possible. This means you should usually cite a specific text range within a block.
@@ -73,9 +79,9 @@ def format_chat_message(
                 cur_content += f"\n<tool call>\n{tool_call.function}({args})\n</tool call>"
     if message.metadata:
-        metadata_yaml = yaml_dump_metadata(message.metadata)
-        if metadata_yaml is not None:
-            cur_content += f"\n<|message metadata|>\n{metadata_yaml}\n</|message metadata|>"
+        metadata_text = dump_metadata(message.metadata)
+        if metadata_text is not None:
+            cur_content += f"\n<|message metadata|>\n{metadata_text}\n</|message metadata|>"
     return TRANSCRIPT_BLOCK_TEMPLATE.format(
         index_label=index_label, role=message.role, content=cur_content
@@ -127,13 +133,11 @@ class TranscriptGroup(BaseModel):
             str: XML-like wrapped text including the group's metadata.
         """
         # Prepare YAML metadata
-        yaml_text = yaml_dump_metadata(self.metadata)
-        if yaml_text is not None:
+        metadata_text = dump_metadata(self.metadata)
+        if metadata_text is not None:
             if indent > 0:
-                yaml_text = textwrap.indent(yaml_text, " " * indent)
-            inner = (
-                f"{children_text}\n<|{self.name} metadata|>\n{yaml_text}\n</|{self.name} metadata|>"
-            )
+                metadata_text = textwrap.indent(metadata_text, " " * indent)
+            inner = f"{children_text}\n<|{self.name} metadata|>\n{metadata_text}\n</|{self.name} metadata|>"
         else:
             inner = children_text
@@ -447,13 +451,11 @@ class Transcript(BaseModel):
         content_str = f"<|T{transcript_idx} blocks|>\n{blocks_str}\n</|T{transcript_idx} blocks|>"
         # Gather metadata and add to content
-        yaml_text = yaml_dump_metadata(self.metadata)
-        if yaml_text is not None:
+        metadata_text = dump_metadata(self.metadata)
+        if metadata_text is not None:
             if indent > 0:
-                yaml_text = textwrap.indent(yaml_text, " " * indent)
-            content_str += (
-                f"\n<|T{transcript_idx} metadata|>\n{yaml_text}\n</|T{transcript_idx} metadata|>"
-            )
+                metadata_text = textwrap.indent(metadata_text, " " * indent)
+            content_str += f"\n<|T{transcript_idx} metadata|>\n{metadata_text}\n</|T{transcript_idx} metadata|>"
         # Format content and return
         if indent > 0:

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/sdk/agent_run_writer.py RENAMED Viewed

@@ -4,11 +4,12 @@ import queue
 import signal
 import threading
 import time
-from typing import Any, Callable, Coroutine, Optional
+from typing import Any, AsyncGenerator, Callable, Coroutine, Optional
 import anyio
 import backoff
 import httpx
+import orjson
 from backoff.types import Details
 from docent._log_util.logger import get_logger
@@ -38,6 +39,15 @@ def _print_backoff_message(e: Details):
     )
+async def _generate_payload_chunks(runs: list[AgentRun]) -> AsyncGenerator[bytes, None]:
+    yield b'{"agent_runs": ['
+    for i, ar in enumerate(runs):
+        if i > 0:
+            yield b","
+        yield orjson.dumps(ar.model_dump(mode="json"))
+    yield b"]}"
 class AgentRunWriter:
     """Background thread for logging agent runs.
@@ -175,7 +185,7 @@ class AgentRunWriter:
             logger.info("Cancelling pending tasks...")
             self._cancel_event.set()
             n_pending = self._queue.qsize()
-            logger.info(f"Cancelled ~{n_pending} pending tasks")
+            logger.info(f"Cancelled ~{n_pending} pending runs")
             # Give a brief moment to exit
             logger.info("Waiting for thread to exit...")
@@ -194,8 +204,11 @@ class AgentRunWriter:
             on_backoff=_print_backoff_message,
         )
         async def _post_batch(batch: list[AgentRun]) -> None:
-            payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
-            resp = await client.post(self._endpoint, json=payload, timeout=self._request_timeout)
+            resp = await client.post(
+                self._endpoint,
+                content=_generate_payload_chunks(batch),
+                timeout=self._request_timeout,
+            )
             resp.raise_for_status()
         return _post_batch
@@ -246,7 +259,7 @@ def init(
     web_url: str = "https://docent.transluce.org",
     api_key: str | None = None,
     # Writer arguments
-    num_workers: int = 2,
+    num_workers: int = 4,
     queue_maxsize: int = 20_000,
     request_timeout: float = 30.0,
     flush_interval: float = 1.0,

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/sdk/client.py RENAMED Viewed

@@ -8,6 +8,7 @@ from tqdm import tqdm
 from docent._log_util.logger import get_logger
 from docent.data_models.agent_run import AgentRun
+from docent.data_models.judge import JudgeRunLabel
 from docent.loaders import load_inspect
 logger = get_logger(__name__)
@@ -48,13 +49,18 @@ class Docent:
         self._login(api_key)
+    def _handle_response_errors(self, response: requests.Response):
+        """Handle API response and raise informative errors.
+        TODO: make this more informative."""
+        response.raise_for_status()
     def _login(self, api_key: str):
         """Login with email/password to establish session."""
         self._session.headers.update({"Authorization": f"Bearer {api_key}"})
         url = f"{self._server_url}/api-keys/test"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         logger.info("Logged in with API key")
         return
@@ -90,7 +96,7 @@ class Docent:
         }
         response = self._session.post(url, json=payload)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         response_data = response.json()
         collection_id = response_data.get("collection_id")
@@ -134,13 +140,13 @@ class Docent:
                 payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
                 response = self._session.post(url, json=payload)
-                response.raise_for_status()
+                self._handle_response_errors(response)
                 pbar.update(len(batch))
         url = f"{self._server_url}/{collection_id}/compute_embeddings"
         response = self._session.post(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         logger.info(f"Successfully added {total_runs} agent runs to Collection '{collection_id}'")
         return {"status": "success", "total_runs_added": total_runs}
@@ -156,7 +162,7 @@ class Docent:
         """
         url = f"{self._server_url}/collections"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def list_rubrics(self, collection_id: str) -> list[dict[str, Any]]:
@@ -173,7 +179,7 @@ class Docent:
         """
         url = f"{self._server_url}/rubric/{collection_id}/rubrics"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def get_rubric_run_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
@@ -191,7 +197,7 @@ class Docent:
         """
         url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/rubric_run_state"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def get_clustering_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
@@ -209,7 +215,7 @@ class Docent:
         """
         url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/clustering_job"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def get_cluster_centroids(self, collection_id: str, rubric_id: str) -> list[dict[str, Any]]:
@@ -244,6 +250,90 @@ class Docent:
         clustering_state = self.get_clustering_state(collection_id, rubric_id)
         return clustering_state.get("assignments", {})
+    def add_label(
+        self,
+        collection_id: str,
+        rubric_id: str,
+        label: JudgeRunLabel,
+    ) -> dict[str, Any]:
+        """Attach a manual label to an agent run for a rubric.
+        Args:
+            collection_id: ID of the Collection that owns the rubric.
+            rubric_id: ID of the rubric the label applies to.
+            label: A `JudgeRunLabel` that must comply with the rubric's output schema.
+        Returns:
+            dict: API response containing a status message.
+        Raises:
+            ValueError: If the label does not target the rubric specified in the path.
+            requests.exceptions.HTTPError: If the API request fails or validation errors occur.
+        """
+        if label.rubric_id != rubric_id:
+            raise ValueError("Label rubric_id must match the rubric_id argument")
+        url = f"{self._server_url}/rubric/{collection_id}/rubric/{rubric_id}/label"
+        payload = {"label": label.model_dump(mode="json")}
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+        return response.json()
+    def add_labels(
+        self,
+        collection_id: str,
+        rubric_id: str,
+        labels: list[JudgeRunLabel],
+    ) -> dict[str, Any]:
+        """Attach multiple manual labels to a rubric.
+        Args:
+            collection_id: ID of the Collection that owns the rubric.
+            rubric_id: ID of the rubric the labels apply to.
+            labels: List of `JudgeRunLabel` objects.
+        Returns:
+            dict: API response containing status information.
+        Raises:
+            ValueError: If no labels are provided.
+            ValueError: If any label targets a different rubric.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        if not labels:
+            raise ValueError("labels must contain at least one entry")
+        rubric_ids = {label.rubric_id for label in labels}
+        if rubric_ids != {rubric_id}:
+            raise ValueError(
+                "All labels must specify the same rubric_id that is provided to add_labels"
+            )
+        payload = {"labels": [l.model_dump(mode="json") for l in labels]}
+        url = f"{self._server_url}/rubric/{collection_id}/rubric/{rubric_id}/labels"
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+        return response.json()
+    def get_labels(self, collection_id: str, rubric_id: str) -> list[dict[str, Any]]:
+        """Retrieve all manual labels for a rubric.
+        Args:
+            collection_id: ID of the Collection that owns the rubric.
+            rubric_id: ID of the rubric to fetch labels for.
+        Returns:
+            list: List of label dictionaries. Each includes agent_run_id and label content.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/rubric/{collection_id}/rubric/{rubric_id}/labels"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return response.json()
     def get_agent_run(self, collection_id: str, agent_run_id: str) -> AgentRun | None:
         """Get a specific agent run by its ID.
@@ -259,7 +349,7 @@ class Docent:
         """
         url = f"{self._server_url}/{collection_id}/agent_run"
         response = self._session.get(url, params={"agent_run_id": agent_run_id})
-        response.raise_for_status()
+        self._handle_response_errors(response)
         if response.json() is None:
             return None
         else:
@@ -281,7 +371,7 @@ class Docent:
         """
         url = f"{self._server_url}/{collection_id}/make_public"
         response = self._session.post(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         logger.info(f"Successfully made Collection '{collection_id}' public")
         return response.json()
@@ -303,13 +393,7 @@ class Docent:
         payload = {"email": email}
         response = self._session.post(url, json=payload)
-        try:
-            response.raise_for_status()
-        except requests.exceptions.HTTPError:
-            if response.status_code == 404:
-                raise ValueError(f"The user you are trying to share with ({email}) does not exist.")
-            else:
-                raise  # Re-raise the original exception
+        self._handle_response_errors(response)
         logger.info(f"Successfully shared Collection '{collection_id}' with {email}")
         return response.json()
@@ -328,7 +412,7 @@ class Docent:
         """
         url = f"{self._server_url}/{collection_id}/agent_run_ids"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def recursively_ingest_inspect_logs(self, collection_id: str, fpath: str):
@@ -393,7 +477,7 @@ class Docent:
                         payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
                         response = self._session.post(url, json=payload)
-                        response.raise_for_status()
+                        self._handle_response_errors(response)
                         runs_from_file += len(batch_list)
                         file_pbar.update(len(batch_list))
@@ -406,7 +490,7 @@ class Docent:
             logger.info("Computing embeddings for added runs...")
             url = f"{self._server_url}/{collection_id}/compute_embeddings"
             response = self._session.post(url)
-            response.raise_for_status()
+            self._handle_response_errors(response)
         logger.info(
             f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/docent/trace.py RENAMED Viewed

@@ -21,7 +21,7 @@ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExport
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
 from opentelemetry.instrumentation.threading import ThreadingInstrumentor
 from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
+from opentelemetry.sdk.trace import ReadableSpan, SpanLimits, SpanProcessor, TracerProvider
 from opentelemetry.sdk.trace.export import (
     BatchSpanProcessor,
     ConsoleSpanExporter,
@@ -29,20 +29,13 @@ from opentelemetry.sdk.trace.export import (
 )
 from opentelemetry.trace import Span
-# Configure logging
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.ERROR)
 # Default configuration
 DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
 DEFAULT_COLLECTION_NAME = "default-collection-name"
-def _is_tracing_disabled() -> bool:
-    """Check if tracing is disabled via environment variable."""
-    return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
 class Instruments(Enum):
     """Enumeration of available instrument types."""
@@ -52,16 +45,10 @@ class Instruments(Enum):
     LANGCHAIN = "langchain"
-def _is_notebook() -> bool:
-    """Check if we're running in a Jupyter notebook."""
-    try:
-        return "ipykernel" in sys.modules
-    except Exception:
-        return False
 class DocentTracer:
-    """Manages Docent tracing setup and provides tracing utilities."""
+    """
+    Manages Docent tracing setup and provides tracing utilities.
+    """
     def __init__(
         self,
@@ -77,22 +64,6 @@ class DocentTracer:
         instruments: Optional[Set[Instruments]] = None,
         block_instruments: Optional[Set[Instruments]] = None,
     ):
-        """
-        Initialize Docent tracing manager.
-        Args:
-            collection_name: Name of the collection for resource attributes
-            collection_id: Optional collection ID (auto-generated if not provided)
-            agent_run_id: Optional agent_run_id to use for code outside of an agent run context (auto-generated if not provided)
-            endpoint: OTLP endpoint URL(s) - can be a single string or list of strings for multiple endpoints
-            headers: Optional headers for authentication
-            api_key: Optional API key for bearer token authentication (takes precedence over env var)
-            enable_console_export: Whether to export to console
-            enable_otlp_export: Whether to export to OTLP endpoint
-            disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
-            instruments: Set of instruments to enable (None = all instruments)
-            block_instruments: Set of instruments to explicitly disable
-        """
         self._initialized: bool = False
         # Check if tracing is disabled via environment variable
         if _is_tracing_disabled():
@@ -163,8 +134,12 @@ class DocentTracer:
         """
         Get the current agent run ID from context.
+        Retrieves the agent run ID that was set in the current execution context.
+        If no agent run context is active, returns the default agent run ID.
         Returns:
-            The current agent run ID if available, None otherwise
+            The current agent run ID if available, or the default agent run ID
+            if no context is active.
         """
         try:
             return self._agent_run_id_var.get()
@@ -249,12 +224,23 @@ class DocentTracer:
             return
         try:
+            # Check for OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT environment variable
+            default_attribute_limit = 1024
+            env_value = os.environ.get("OTEL_SPAN_ATTRIBUTE_COUNT_LIMIT", "0")
+            env_limit = int(env_value) if env_value.isdigit() else 0
+            attribute_limit = max(env_limit, default_attribute_limit)
+            span_limits = SpanLimits(
+                max_attributes=attribute_limit,
+            )
             # Create our own isolated tracer provider
             self._tracer_provider = TracerProvider(
-                resource=Resource.create({"service.name": self.collection_name})
+                resource=Resource.create({"service.name": self.collection_name}),
+                span_limits=span_limits,
             )
-            # Add custom span processor for agent_run_id and transcript_id
             class ContextSpanProcessor(SpanProcessor):
                 def __init__(self, manager: "DocentTracer"):
                     self.manager: "DocentTracer" = manager
@@ -312,11 +298,7 @@ class DocentTracer:
                     )
                 def on_end(self, span: ReadableSpan) -> None:
-                    # Debug logging for span completion
-                    span_attrs = span.attributes or {}
-                    logger.debug(
-                        f"Completed span: name='{span.name}', collection_id={span_attrs.get('collection_id')}, agent_run_id={span_attrs.get('agent_run_id')}, transcript_id={span_attrs.get('transcript_id')}, duration_ns={span.end_time - span.start_time if span.end_time and span.start_time else 'unknown'}"
-                    )
+                    pass
                 def shutdown(self) -> None:
                     pass
@@ -422,7 +404,17 @@ class DocentTracer:
             raise
     def cleanup(self):
-        """Clean up Docent tracing resources and signal trace completion to backend."""
+        """
+        Clean up Docent tracing resources.
+        Flushes all pending spans to exporters and shuts down the tracer provider.
+        This method is automatically called during application shutdown via atexit
+        handlers, but can also be called manually for explicit cleanup.
+        The cleanup process:
+        1. Flushes all span processors to ensure data is exported
+        2. Shuts down the tracer provider and releases resources
+        """
         if self._disabled:
             return
@@ -473,7 +465,7 @@ class DocentTracer:
         if disabled and self._initialized:
             self.cleanup()
-    def verify_initialized(self) -> bool:
+    def is_initialized(self) -> bool:
         """Verify if the manager is properly initialized."""
         return self._initialized
@@ -1063,8 +1055,9 @@ def initialize_tracing(
         collection_id: Optional collection ID (auto-generated if not provided)
         endpoint: OTLP endpoint URL(s) for span export - can be a single string or list of strings for multiple endpoints
         headers: Optional headers for authentication
-        api_key: Optional API key for bearer token authentication (takes precedence over env var)
-        enable_console_export: Whether to export spans to console
+        api_key: Optional API key for bearer token authentication (takes precedence
+                over DOCENT_API_KEY environment variable)
+        enable_console_export: Whether to export spans to console for debugging
         enable_otlp_export: Whether to export spans to OTLP endpoint
         disable_batch: Whether to disable batch processing (use SimpleSpanProcessor)
         instruments: Set of instruments to enable (None = all instruments).
@@ -1074,7 +1067,6 @@ def initialize_tracing(
         The initialized Docent tracer
     Example:
-        # Basic setup
         initialize_tracing("my-collection")
     """
@@ -1137,17 +1129,17 @@ def close_tracing() -> None:
 def flush_tracing() -> None:
     """Force flush all spans to exporters."""
     if _global_tracer:
-        logger.debug("Flushing global tracer")
+        logger.debug("Flushing Docent tracer")
         _global_tracer.flush()
     else:
         logger.debug("No global tracer available to flush")
-def verify_initialized() -> bool:
+def is_initialized() -> bool:
     """Verify if the global Docent tracer is properly initialized."""
     if _global_tracer is None:
         return False
-    return _global_tracer.verify_initialized()
+    return _global_tracer.is_initialized()
 def is_disabled() -> bool:
@@ -1764,3 +1756,16 @@ def transcript_group_context(
     return TranscriptGroupContext(
         name, transcript_group_id, description, metadata, parent_transcript_group_id
     )
+def _is_tracing_disabled() -> bool:
+    """Check if tracing is disabled via environment variable."""
+    return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
+def _is_notebook() -> bool:
+    """Check if we're running in a Jupyter notebook."""
+    try:
+        return "ipykernel" in sys.modules
+    except Exception:
+        return False

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.18-alpha"
+version = "0.1.19-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]

{docent_python-0.1.18a0 → docent_python-0.1.19a0}/uv.lock RENAMED Viewed

@@ -360,7 +360,7 @@ wheels = [
 [[package]]
 name = "docent-python"
-version = "0.1.15a0"
+version = "0.1.18a0"
 source = { editable = "." }
 dependencies = [
     { name = "backoff" },

docent_python-0.1.18a0/docent/data_models/metadata.py DELETED Viewed

@@ -1,229 +0,0 @@
-# import traceback
-# from typing import Any, Optional
-# from pydantic import (
-#     BaseModel,
-#     ConfigDict,
-#     Field,
-#     PrivateAttr,
-#     SerializerFunctionWrapHandler,
-#     model_serializer,
-#     model_validator,
-# )
-# from docent._log_util import get_logger
-# logger = get_logger(__name__)
-# SINGLETONS = (int, float, str, bool)
-# class BaseMetadata(BaseModel):
-#     """Provides common functionality for accessing and validating metadata fields.
-#     All metadata classes should inherit from this class.
-#     Serialization Behavior:
-#         - Field descriptions are highly recommended and stored in serialized versions of the object.
-#         - When a subclass of BaseMetadata is uploaded to a server, all extra fields and their descriptions are retained.
-#         - To recover the original structure with proper typing upon download, use:
-#           `CustomMetadataClass.model_validate(obj.model_dump())`.
-#     Attributes:
-#         model_config: Pydantic configuration that allows extra fields.
-#         allow_fields_without_descriptions: Boolean indicating whether to allow fields without descriptions.
-#     """
-#     model_config = ConfigDict(extra="allow")
-#     allow_fields_without_descriptions: bool = True
-#     # Private attribute to store field descriptions
-#     _field_descriptions: dict[str, str | None] | None = PrivateAttr(default=None)
-#     _internal_basemetadata_fields: set[str] = PrivateAttr(
-#         default={
-#             "allow_fields_without_descriptions",
-#             "model_config",
-#             "_field_descriptions",
-#         }
-#     )
-#     @model_validator(mode="after")
-#     def _validate_field_types_and_descriptions(self):
-#         """Validates that all fields have descriptions and proper types.
-#         Returns:
-#             Self: The validated model instance.
-#         Raises:
-#             ValueError: If any field is missing a description or has an invalid type.
-#         """
-#         # Validate each field in the model
-#         for field_name, field_info in self.__class__.model_fields.items():
-#             if field_name in self._internal_basemetadata_fields:
-#                 continue
-#             # Check that field has a description
-#             if field_info.description is None:
-#                 if not self.allow_fields_without_descriptions:
-#                     raise ValueError(
-#                         f"Field `{field_name}` needs a description in the definition of `{self.__class__.__name__}`, like `{field_name}: T = Field(description=..., default=...)`. "
-#                         "To allow un-described fields, set `allow_fields_without_descriptions = True` on the instance or in your metadata class definition."
-#                     )
-#         # Validate that the metadata is JSON serializable
-#         try:
-#             self.model_dump_json()
-#         except Exception as e:
-#             raise ValueError(
-#                 f"Metadata is not JSON serializable: {e}. Traceback: {traceback.format_exc()}"
-#             )
-#         return self
-#     def model_post_init(self, __context: Any) -> None:
-#         """Initializes field descriptions from extra data after model initialization.
-#         Args:
-#             __context: The context provided by Pydantic's post-initialization hook.
-#         """
-#         fd = self.model_extra.pop("_field_descriptions", None) if self.model_extra else None
-#         if fd is not None:
-#             self._field_descriptions = fd
-#     @model_serializer(mode="wrap")
-#     def _serialize_model(self, handler: SerializerFunctionWrapHandler):
-#         # Call the default serializer
-#         data = handler(self)
-#         # Dump the field descriptions
-#         if self._field_descriptions is None:
-#             self._field_descriptions = self._compute_field_descriptions()
-#         data["_field_descriptions"] = self._field_descriptions
-#         return data
-#     def model_dump(
-#         self, *args: Any, strip_internal_fields: bool = False, **kwargs: Any
-#     ) -> dict[str, Any]:
-#         data = super().model_dump(*args, **kwargs)
-#         # Remove internal fields if requested
-#         if strip_internal_fields:
-#             for field in self._internal_basemetadata_fields:
-#                 if field in data:
-#                     data.pop(field)
-#         return data
-#     def get(self, key: str, default_value: Any = None) -> Any:
-#         """Gets a value from the metadata by key.
-#         Args:
-#             key: The key to look up in the metadata.
-#             default_value: Value to return if the key is not found. Defaults to None.
-#         Returns:
-#             Any: The value associated with the key, or the default value if not found.
-#         """
-#         # Check if the field exists in the model's fields
-#         if key in self.__class__.model_fields or (
-#             self.model_extra is not None and key in self.model_extra
-#         ):
-#             # Field exists, return its value (even if None)
-#             return getattr(self, key)
-#         logger.warning(f"Field '{key}' not found in {self.__class__.__name__}")
-#         return default_value
-#     def get_field_description(self, field_name: str) -> str | None:
-#         """Gets the description of a field defined in the model schema.
-#         Args:
-#             field_name: The name of the field.
-#         Returns:
-#             str or None: The description string if the field is defined in the model schema
-#                 and has a description, otherwise None.
-#         """
-#         if self._field_descriptions is None:
-#             self._field_descriptions = self._compute_field_descriptions()
-#         if field_name in self._field_descriptions:
-#             return self._field_descriptions[field_name]
-#         logger.warning(
-#             f"Field description for '{field_name}' not found in {self.__class__.__name__}"
-#         )
-#         return None
-#     def get_all_field_descriptions(self) -> dict[str, str | None]:
-#         """Gets descriptions for all fields defined in the model schema.
-#         Returns:
-#             dict: A dictionary mapping field names to their descriptions.
-#                 Only includes fields that have descriptions defined in the schema.
-#         """
-#         if self._field_descriptions is None:
-#             self._field_descriptions = self._compute_field_descriptions()
-#         return self._field_descriptions
-#     def _compute_field_descriptions(self) -> dict[str, str | None]:
-#         """Computes descriptions for all fields in the model.
-#         Returns:
-#             dict: A dictionary mapping field names to their descriptions.
-#         """
-#         field_descriptions: dict[str, Optional[str]] = {}
-#         for field_name, field_info in self.__class__.model_fields.items():
-#             if field_name not in self._internal_basemetadata_fields:
-#                 field_descriptions[field_name] = field_info.description
-#         return field_descriptions
-# class BaseAgentRunMetadata(BaseMetadata):
-#     """Extends BaseMetadata with fields specific to agent evaluation runs.
-#     Attributes:
-#         scores: Dictionary of evaluation metrics.
-#     """
-#     scores: dict[str, int | float | bool | None] = Field(
-#         description="A dict of score_key -> score_value. Use one key for each metric you're tracking."
-#     )
-# class InspectAgentRunMetadata(BaseAgentRunMetadata):
-#     """Extends BaseAgentRunMetadata with fields specific to Inspect runs.
-#     Attributes:
-#         task_id: The ID of the 'benchmark' or 'set of evals' that the transcript belongs to
-#         sample_id: The specific task inside of the `task_id` benchmark that the transcript was run on
-#         epoch_id: Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run.
-#         model: The model that was used to generate the transcript
-#         scoring_metadata: Additional metadata about the scoring process
-#         additional_metadata: Additional metadata about the transcript
-#     """
-#     task_id: str = Field(
-#         description="The ID of the 'benchmark' or 'set of evals' that the transcript belongs to"
-#     )
-#     # Identification of this particular run
-#     sample_id: str = Field(
-#         description="The specific task inside of the `task_id` benchmark that the transcript was run on"
-#     )
-#     epoch_id: int = Field(
-#         description="Each `sample_id` should be run multiple times due to stochasticity; `epoch_id` is the integer index of a specific run."
-#     )
-#     # Parameters for the run
-#     model: str = Field(description="The model that was used to generate the transcript")
-#     # Scoring
-#     scoring_metadata: dict[str, Any] | None = Field(
-#         description="Additional metadata about the scoring process"
-#     )
-#     # Inspect metadata
-#     additional_metadata: dict[str, Any] | None = Field(
-#         description="Additional metadata about the transcript"
-#     )

docent_python-0.1.18a0/docent/data_models/yaml_util.py DELETED Viewed

@@ -1,12 +0,0 @@
-from typing import Any
-import yaml
-from pydantic_core import to_jsonable_python
-def yaml_dump_metadata(metadata: dict[str, Any]) -> str | None:
-    if not metadata:
-        return None
-    metadata_obj = to_jsonable_python(metadata)
-    yaml_text = yaml.dump(metadata_obj, width=float("inf"))
-    return yaml_text.strip()