PyPI - docent-python - Versions diffs - 0.1.38a0__tar.gz → 0.1.40a0__tar.gz - Mend

docent-python 0.1.38a0tar.gz → 0.1.40a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.38a0
+Version: 0.1.40a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/docent/_llm_util/model_registry.py RENAMED Viewed

@@ -54,6 +54,10 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
         "claude-sonnet-4",
         ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
     ),
+    (
+        "claude-sonnet-4-5",
+        ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
+    ),
     (
         "claude-haiku-4-5",
         ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/docent/_llm_util/providers/preference_types.py RENAMED Viewed

@@ -95,7 +95,7 @@ class PublicProviderPreferences(BaseModel):
             ModelOption(provider="openai", model_name="gpt-5-mini", reasoning_effort="high"),
             ModelOption(
                 provider="anthropic",
-                model_name="claude-sonnet-4-20250514",
+                model_name="claude-sonnet-4-5",
                 reasoning_effort="medium",
             ),
         ]

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/docent/data_models/agent_run.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import sys
 import textwrap
+from collections import deque
 from datetime import datetime
-from queue import Queue
 from typing import Any, Literal, TypedDict, cast
 from uuid import uuid4
@@ -257,6 +257,13 @@ class AgentRun(BaseModel):
             self._transcript_group_dict = {tg.id: tg for tg in self.transcript_groups}
         return self._transcript_group_dict
+    def _invalidate_caches(self) -> None:
+        """Reset cached lookups after mutating transcripts or transcript groups."""
+        self._transcript_dict = None
+        self._transcript_group_dict = None
+        self._canonical_tree_cache.clear()
+        self._transcript_ids_ordered_cache.clear()
     def get_canonical_tree(
         self, full_tree: bool = False
     ) -> dict[str | None, list[tuple[Literal["t", "tg"], str]]]:
@@ -328,14 +335,11 @@ class AgentRun(BaseModel):
                 tg_tree.setdefault(t.transcript_group_id or "__global_root", set()).add(("t", t_id))
         else:
             # Initialize q with "important" tgs
-            q, seen = Queue[str](), set[str]()
-            for tg_id in tgs_to_transcripts.keys():
-                q.put(tg_id)
-                seen.add(tg_id)
+            q, seen = deque(tgs_to_transcripts.keys()), set(tgs_to_transcripts.keys())
             # Do an "upwards BFS" from leaves up to the root. Builds a tree of only relevant nodes.
-            while q.qsize() > 0:
-                u_id = q.get()
+            while q:
+                u_id = q.popleft()
                 u = tg_dict.get(u_id)  # None if __global_root
                 # Add the transcripts under this tg
@@ -349,7 +353,7 @@ class AgentRun(BaseModel):
                     tg_tree.setdefault(par_id, set()).add(("tg", u_id))
                     # If we haven't investigated the parent before, add to q
                     if par_id not in seen:
-                        q.put(par_id)
+                        q.append(par_id)
                         seen.add(par_id)
         # For each node, sort by created_at timestamp
@@ -384,6 +388,38 @@ class AgentRun(BaseModel):
         return c_tree, transcript_idx_map
+    def delete_transcript_group_subtree(self, transcript_group_id: str) -> None:
+        """Delete a transcript group and all descendant groups/transcripts using the canonical tree."""
+        if transcript_group_id == "__global_root":
+            raise ValueError("Cannot delete the global root sentinel")
+        if transcript_group_id not in self.transcript_group_dict:
+            raise ValueError(
+                f"Transcript group '{transcript_group_id}' does not exist on this run."
+            )
+        canonical_tree = self.get_canonical_tree(full_tree=True)
+        groups_to_delete: set[str] = set()
+        transcripts_to_delete: set[str] = set()
+        queue: deque[str] = deque([transcript_group_id])
+        while queue:
+            current_group = queue.popleft()
+            groups_to_delete.add(current_group)
+            for child_type, child_id in canonical_tree.get(current_group, []):
+                if child_type == "tg":
+                    queue.append(child_id)
+                else:
+                    transcripts_to_delete.add(child_id)
+        if groups_to_delete:
+            self.transcript_groups = [
+                tg for tg in self.transcript_groups if tg.id not in groups_to_delete
+            ]
+        if transcripts_to_delete:
+            self.transcripts = [t for t in self.transcripts if t.id not in transcripts_to_delete]
+        self._invalidate_caches()
     def to_text_new(
         self,
         agent_run_alias: int | str = 0,

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/docent/data_models/chat/message.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from logging import getLogger
 from typing import Annotated, Any, Literal
-from pydantic import BaseModel, Discriminator, Field
+from pydantic import BaseModel, Discriminator
 from docent.data_models.chat.content import Content
 from docent.data_models.chat.tool import ToolCall
@@ -23,7 +23,7 @@ class BaseChatMessage(BaseModel):
     id: str | None = None
     content: str | list[Content]
     role: Literal["system", "user", "assistant", "tool"]
-    metadata: dict[str, Any] = Field(default_factory=dict)
+    metadata: dict[str, Any] | None = None
     @property
     def text(self) -> str:

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/docent/data_models/judge.py RENAMED Viewed

@@ -10,9 +10,7 @@ class Label(BaseModel):
     id: str = Field(default_factory=lambda: str(uuid4()))
     label_set_id: str
     label_value: dict[str, Any]
     agent_run_id: str

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/docent/sdk/client.py RENAMED Viewed

@@ -1,11 +1,15 @@
+import gzip
 import itertools
+import json
 import os
+import time
 import webbrowser
 from pathlib import Path
-from typing import Any, Literal
+from typing import Any, Iterator, Literal
 import pandas as pd
 import requests
+from pydantic_core import to_jsonable_python
 from tqdm import tqdm
 from docent._log_util.logger import get_logger
@@ -16,6 +20,61 @@ from docent.judges.util.meta_schema import validate_judge_result_schema
 from docent.loaders import load_inspect
 from docent.sdk.llm_context import LLMContext, LLMContextItem
+MAX_AGENT_RUN_PAYLOAD_BYTES = 100 * 1024 * 1024  # 100MB backend limit
+_AGENT_RUNS_PAYLOAD_PREFIX = b'{"agent_runs":['
+_AGENT_RUNS_PAYLOAD_SUFFIX = b"]}"
+def _serialize_agent_run(agent_run: AgentRun) -> bytes:
+    """Serialize an AgentRun to compact JSON bytes."""
+    return json.dumps(to_jsonable_python(agent_run), separators=(",", ":")).encode("utf-8")
+def _build_agent_runs_payload(serialized_runs: list[bytes]) -> bytes:
+    """Wrap serialized individual runs into the API payload envelope."""
+    body = b",".join(serialized_runs)
+    return _AGENT_RUNS_PAYLOAD_PREFIX + body + _AGENT_RUNS_PAYLOAD_SUFFIX
+def _yield_agent_run_batches_by_size(
+    agent_runs: list[AgentRun], max_payload_bytes: int
+) -> Iterator[tuple[int, bytes]]:
+    """Yield batches of agent runs whose serialized payloads stay within max_payload_bytes."""
+    envelope_len = len(_AGENT_RUNS_PAYLOAD_PREFIX) + len(_AGENT_RUNS_PAYLOAD_SUFFIX)
+    comma_len = 1
+    current_serialized: list[bytes] = []
+    current_size = envelope_len
+    for agent_run in agent_runs:
+        serialized = _serialize_agent_run(agent_run)
+        serialized_len = len(serialized)
+        if envelope_len + serialized_len > max_payload_bytes:
+            raise ValueError(
+                f"A single agent run (id={agent_run.id}) exceeds the maximum payload size of "
+                f"{max_payload_bytes} bytes. Reduce the size of that run before uploading."
+            )
+        delimiter = 0 if not current_serialized else comma_len
+        projected_size = current_size + delimiter + serialized_len
+        # If adding the next run would exceed the max payload size, yield the current batch
+        if current_serialized and projected_size > max_payload_bytes:
+            yield len(current_serialized), _build_agent_runs_payload(current_serialized)
+            # Add the "next run" as the first run in the next batch
+            current_serialized = [serialized]
+            current_size = envelope_len + serialized_len
+        # Otherwise, add to the current batch and continue
+        else:
+            current_serialized.append(serialized)
+            current_size = projected_size
+    if current_serialized:
+        yield len(current_serialized), _build_agent_runs_payload(current_serialized)
 logger = get_logger(__name__)
@@ -37,6 +96,7 @@ class Docent:
         self,
         *,
         domain: str = "docent.transluce.org",
+        use_https: bool = True,
         api_key: str | None = None,
         # Deprecated
         server_url: str | None = None,  # Use domain instead
@@ -73,13 +133,14 @@ class Docent:
         self._domain = domain
         # Set server URL; server_url takes precedence over domain
-        server_url = (server_url or f"https://api.{domain}").rstrip("/")
+        prefix = "https://" if use_https else "http://"
+        server_url = (server_url or f"{prefix}api.{domain}").rstrip("/")
         if not server_url.endswith("/rest"):
             server_url = f"{server_url}/rest"
         self._server_url = server_url
         # Set web URL; web_url takes precedence over domain
-        self._web_url = (web_url or f"https://{domain}").rstrip("/")
+        self._web_url = (web_url or f"{prefix}{domain}").rstrip("/")
         # Use requests.Session for connection pooling and persistent headers
         self._session = requests.Session()
@@ -192,41 +253,199 @@ class Docent:
         logger.info(f"Successfully updated Collection '{collection_id}'")
     def add_agent_runs(
-        self, collection_id: str, agent_runs: list[AgentRun], batch_size: int = 1000
+        self,
+        collection_id: str,
+        agent_runs: list[AgentRun],
+        *,
+        compression: Literal["gzip", "none"] = "gzip",
+        wait: bool = True,
+        poll_interval: float = 1.0,
+        # Deprecated
+        batch_size: int | None = None,
     ) -> dict[str, Any]:
         """Adds agent runs to a Collection.
         Agent runs represent execution traces that can be visualized and analyzed.
-        This method batches the insertion in groups of 1,000 for better performance.
+        Requests are automatically chunked to stay under the backend's payload limit.
         Args:
             collection_id: ID of the Collection.
             agent_runs: List of AgentRun objects to add.
+            compression: Compression algorithm for request bodies. Defaults to gzip.
+                Set to "none" to retain legacy behavior.
+            wait: If True (default), wait for all ingestion jobs to complete before returning.
+                If False, return immediately after enqueuing jobs.
+            poll_interval: Seconds between status checks when wait=True. Defaults to 1.0.
         Returns:
-            dict: API response data.
+            dict: API response data containing:
+                - status: "success" if all jobs completed, "enqueued" if wait=False
+                - total_runs_added: Number of agent runs submitted
+                - job_ids: List of job IDs for tracking
         Raises:
+            ValueError: If any single agent run exceeds the maximum payload size.
             requests.exceptions.HTTPError: If the API request fails.
+            RuntimeError: If any job fails during processing (when wait=True).
         """
-        from tqdm import tqdm
+        if batch_size is not None:
+            logger.warning(
+                "The 'batch_size' parameter is deprecated and will be removed in a future version. "
+                "We have transitioned to a new batching strategy based on the size of the payload."
+            )
         url = f"{self._server_url}/{collection_id}/agent_runs"
         total_runs = len(agent_runs)
+        job_ids: list[str] = []
         # Process agent runs in batches
-        with tqdm(total=total_runs, desc="Adding agent runs", unit="runs") as pbar:
-            for i in range(0, total_runs, batch_size):
-                batch = agent_runs[i : i + batch_size]
-                payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
-                response = self._session.post(url, json=payload)
+        desc = f"Uploading agent runs (compression={compression})"
+        with tqdm(total=total_runs, desc=desc, unit="runs") as pbar:
+            for batch_size, payload_bytes in _yield_agent_run_batches_by_size(
+                agent_runs, MAX_AGENT_RUN_PAYLOAD_BYTES
+            ):
+                request_kwargs: dict[str, Any] = {}
+                if compression == "none":
+                    request_kwargs["data"] = payload_bytes
+                    request_kwargs["headers"] = {"Content-Type": "application/json"}
+                elif compression == "gzip":
+                    request_kwargs["data"] = gzip.compress(payload_bytes)
+                    request_kwargs["headers"] = {
+                        "Content-Type": "application/json",
+                        "Content-Encoding": "gzip",
+                    }
+                else:
+                    raise ValueError(f"Unsupported compression '{compression}'")
+                response = self._session.post(url, **request_kwargs)
                 self._handle_response_errors(response)
-                pbar.update(len(batch))
+                # Server returns 202 with job_id for async processing
+                response_data = response.json()
+                job_id = response_data.get("job_id")
+                if job_id:
+                    job_ids.append(job_id)
+                pbar.update(batch_size)
+        if not wait:
+            logger.info(
+                f"Enqueued {total_runs} agent runs to Collection '{collection_id}' "
+                f"({len(job_ids)} job(s)). Use get_agent_run_job_status() to check progress."
+            )
+            return {
+                "status": "enqueued",
+                "total_runs_added": total_runs,
+                "job_ids": job_ids,
+            }
+        # Wait for all jobs to complete
+        if job_ids:
+            logger.info(
+                f"Uploaded {total_runs} agent runs in {len(job_ids)} batch(es). "
+                f"Waiting for server-side processing to complete... "
+                f"(set wait=False to skip waiting)"
+            )
+            self._wait_for_jobs(collection_id, job_ids, poll_interval)
+        logger.info(
+            f"Successfully added {total_runs} agent runs to Collection '{collection_id}'. "
+            f"All {len(job_ids)} job(s) completed."
+        )
+        return {"status": "success", "total_runs_added": total_runs, "job_ids": job_ids}
+    def _wait_for_jobs(
+        self,
+        collection_id: str,
+        job_ids: list[str],
+        poll_interval: float = 1.0,
+    ) -> None:
+        """Wait for all jobs to complete, showing progress.
+        Args:
+            collection_id: ID of the Collection.
+            job_ids: List of job IDs to wait for.
+            poll_interval: Seconds between status checks.
+        Raises:
+            RuntimeError: If any job fails or is canceled.
+        """
+        pending_jobs = set(job_ids)
+        failed_jobs: dict[str, str] = {}
+        with tqdm(total=len(job_ids), desc="Waiting for server processing", unit="jobs") as pbar:
+            while pending_jobs:
+                statuses = self.get_agent_run_job_statuses(collection_id, list(pending_jobs))
+                for job_status in statuses:
+                    job_id = job_status["job_id"]
+                    status = job_status["status"]
+                    if status == "completed":
+                        pending_jobs.discard(job_id)
+                        pbar.update(1)
+                    elif status == "canceled":
+                        pending_jobs.discard(job_id)
+                        failed_jobs[job_id] = "Job was canceled"
+                        pbar.update(1)
+                if pending_jobs:
+                    time.sleep(poll_interval)
+        if failed_jobs:
+            failed_msg = ", ".join(f"{k}: {v}" for k, v in failed_jobs.items())
+            raise RuntimeError(f"Some jobs failed: {failed_msg}")
+    def get_agent_run_job_statuses(
+        self, collection_id: str, job_ids: list[str]
+    ) -> list[dict[str, Any]]:
+        """Get the status of multiple agent run ingestion jobs.
+        Args:
+            collection_id: ID of the Collection.
+            job_ids: List of job IDs to check (max 100).
+        Returns:
+            list: List of job status dictionaries, each containing:
+                - job_id: The job ID
+                - status: One of "pending", "running", "completed", "canceled"
+                - type: The job type
+                - created_at: ISO timestamp of job creation
+        Raises:
+            ValueError: If more than 100 job IDs are provided.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        if len(job_ids) > 100:
+            raise ValueError("Cannot request more than 100 job IDs at once")
+        url = f"{self._server_url}/{collection_id}/agent_runs/jobs/batch_status"
+        response = self._session.post(url, json={"job_ids": job_ids})
+        self._handle_response_errors(response)
+        return response.json()["jobs"]
+    def get_agent_run_job_status(self, collection_id: str, job_id: str) -> dict[str, Any]:
+        """Get the status of an agent run ingestion job.
+        Args:
+            collection_id: ID of the Collection.
+            job_id: The ID of the job to check.
-        logger.info(f"Successfully added {total_runs} agent runs to Collection '{collection_id}'")
-        return {"status": "success", "total_runs_added": total_runs}
+        Returns:
+            dict: Job status information including:
+                - job_id: The job ID
+                - status: One of "pending", "running", "completed", "canceled"
+                - type: The job type
+                - created_at: ISO timestamp of job creation
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/{collection_id}/agent_runs/jobs/{job_id}"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return response.json()
     def list_collections(self) -> list[Collection]:
         """Lists all available Collections.
@@ -459,6 +678,43 @@ class Docent:
         self._handle_response_errors(response)
         return response.json()
+    def tag_transcript(self, collection_id: str, agent_run_id: str, value: str) -> None:
+        """Add a tag to an agent run transcript.
+        Args:
+            collection_id: ID of the Collection.
+            agent_run_id: The agent run to tag.
+            value: The tag value (max length enforced by the server).
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/label/{collection_id}/tag"
+        payload = {"agent_run_id": agent_run_id, "value": value}
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+    def get_tags(self, collection_id: str, value: str | None = None) -> list[dict[str, Any]]:
+        """Get all tags in a collection, optionally filtered by value."""
+        url = f"{self._server_url}/label/{collection_id}/tags"
+        params = {"value": value} if value is not None else None
+        response = self._session.get(url, params=params)
+        self._handle_response_errors(response)
+        return response.json()
+    def get_tags_for_agent_run(self, collection_id: str, agent_run_id: str) -> list[dict[str, Any]]:
+        """Get all tags attached to a specific agent run."""
+        url = f"{self._server_url}/label/{collection_id}/agent_run/{agent_run_id}/tags"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return response.json()
+    def delete_tag(self, collection_id: str, tag_id: str) -> None:
+        """Delete a tag by ID."""
+        url = f"{self._server_url}/label/{collection_id}/tag/{tag_id}"
+        response = self._session.delete(url)
+        self._handle_response_errors(response)
     def get_agent_run(self, collection_id: str, agent_run_id: str) -> AgentRun | None:
         """Get a specific agent run by its ID.

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/docent/sdk/llm_context.py RENAMED Viewed

@@ -365,7 +365,9 @@ def _get_text_for_citation_target(target: CitationTarget, context: LLMContext) -
             if transcript.id == item.transcript_id:
                 if 0 <= item.block_idx < len(transcript.messages):
                     message = transcript.messages[item.block_idx]
-                    metadata_value = message.metadata.get(item.metadata_key)
+                    metadata_value = (
+                        message.metadata.get(item.metadata_key) if message.metadata else None
+                    )
                     if metadata_value is not None:
                         return json.dumps(metadata_value)
         return None

{docent_python-0.1.38a0 → docent_python-0.1.40a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.38-alpha"
+version = "0.1.40-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]