PyPI - docent-python - Versions diffs - 0.1.47a0__tar.gz → 0.1.49a0__tar.gz - Mend

docent-python 0.1.47a0tar.gz → 0.1.49a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.47a0
+Version: 0.1.49a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/model_registry.py RENAMED Viewed

@@ -30,6 +30,10 @@ class ModelInfo:
 # Note: some providers charge extra for long prompts/outputs. We don't account for this yet.
 _REGISTRY: list[tuple[str, ModelInfo]] = [
+    (
+        "gpt-5-chat-latest",
+        ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
+    ),
     (
         "gpt-5-nano",
         ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
@@ -62,6 +66,10 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
         "claude-haiku-4-5",
         ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
     ),
+    (
+        "claude-opus-4-5-20251101",
+        ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
+    ),
     (
         "gemini-2.5-flash-lite",
         ModelInfo(

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/anthropic.py RENAMED Viewed

@@ -73,6 +73,7 @@ from docent.data_models.chat import (
 from docent.data_models.chat.response_format import ResponseFormat
 logger = get_logger(__name__)
+ANTHROPIC_STRUCTURED_OUTPUTS_BETA = "structured-outputs-2025-11-13"
 def _print_backoff_message(e: Details):
@@ -188,6 +189,25 @@ def _parse_tool_choice(tool_choice: Literal["auto", "required"] | None) -> ToolC
         return ToolChoiceAnyParam(type="any")
+def _build_output_format(response_format: ResponseFormat | None) -> dict[str, Any] | None:
+    if response_format is None:
+        return None
+    if response_format.strict is False:
+        raise NotImplementedError(
+            "Anthropic structured outputs do not support strict=False; "
+            "set ResponseFormat.strict=True."
+        )
+    if response_format.type != "json_schema":
+        raise ValueError(
+            f"Unsupported response format type: {response_format.type}. "
+            "Only 'json_schema' is currently supported."
+        )
+    return {
+        "type": "json_schema",
+        "schema": response_format.schema_,
+    }
 def _convert_anthropic_error(e: Exception):
     if isinstance(e, BadRequestError):
         if "context limit" in e.message.lower() or "prompt is too long" in e.message.lower():
@@ -220,10 +240,6 @@ async def get_anthropic_chat_completion_streaming_async(
     timeout: float = 5.0,
     response_format: ResponseFormat | None = None,
 ):
-    if response_format is not None:
-        raise NotImplementedError(
-            "Structured outputs (response_format) are not implemented for Anthropic yet."
-        )
     if logprobs or top_logprobs is not None:
         raise NotImplementedError(
             "We have not implemented logprobs or top_logprobs for Anthropic yet."
@@ -251,6 +267,14 @@ async def get_anthropic_chat_completion_streaming_async(
                 create_kwargs["tool_choice"] = tool_choice_param
             if system is not None:
                 create_kwargs["system"] = system
+            if response_format is not None:
+                output_format = _build_output_format(response_format)
+                extra_headers = dict(create_kwargs.get("extra_headers", {}))
+                extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
+                create_kwargs["extra_headers"] = extra_headers
+                extra_body = dict(create_kwargs.get("extra_body", {}))
+                extra_body["output_format"] = output_format
+                create_kwargs["extra_body"] = extra_body
             stream = cast(
                 AsyncStream[RawMessageStreamEvent],
@@ -420,10 +444,6 @@ async def get_anthropic_chat_completion_async(
         We should actually implement this at some point, but it does not work.
     """
-    if response_format is not None:
-        raise NotImplementedError(
-            "Structured outputs (response_format) are not implemented for Anthropic yet."
-        )
     if logprobs or top_logprobs is not None:
         raise NotImplementedError(
             "We have not implemented logprobs or top_logprobs for Anthropic yet."
@@ -450,6 +470,14 @@ async def get_anthropic_chat_completion_async(
                 create_kwargs["tool_choice"] = tool_choice_param
             if system is not None:
                 create_kwargs["system"] = system
+            if response_format is not None:
+                output_format = _build_output_format(response_format)
+                extra_headers = dict(create_kwargs.get("extra_headers", {}))
+                extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
+                create_kwargs["extra_headers"] = extra_headers
+                extra_body = dict(create_kwargs.get("extra_body", {}))
+                extra_body["output_format"] = output_format
+                create_kwargs["extra_body"] = extra_body
             raw_output = cast(Message, await client.messages.create(**create_kwargs))

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/_llm_util/providers/google.py RENAMED Viewed

@@ -63,6 +63,27 @@ def _is_retryable_error(exception: BaseException) -> bool:
     return False
+def _build_response_format_config(
+    response_format: ResponseFormat | None,
+    *,
+    model_name: str,
+) -> dict[str, Any]:
+    """Build Gemini structured output config from unified ResponseFormat."""
+    if response_format is None:
+        return {}
+    if response_format.type != "json_schema":
+        raise ValueError(
+            f"Unsupported response format type: {response_format.type} for model {model_name}. "
+            "Only 'json_schema' is currently supported."
+        )
+    return {
+        "response_mime_type": "application/json",
+        "response_json_schema": response_format.schema_,
+    }
 @backoff.on_exception(
     backoff.expo,
     exception=(Exception),
@@ -85,16 +106,16 @@ async def get_google_chat_completion_async(
     timeout: float = 5.0,
     response_format: ResponseFormat | None = None,
 ) -> LLMOutput:
-    if response_format is not None:
-        raise NotImplementedError(
-            "Structured outputs (response_format) are not implemented for Google yet."
-        )
     if logprobs or top_logprobs is not None:
         raise NotImplementedError(
             "We have not implemented logprobs or top_logprobs for Google yet."
         )
     system, input_messages = _parse_chat_messages(messages, tools_provided=bool(tools))
+    response_format_config = _build_response_format_config(
+        response_format,
+        model_name=model_name,
+    )
     async with async_timeout_ctx(timeout):
         thinking_cfg = None
@@ -118,6 +139,7 @@ async def get_google_chat_completion_async(
                     if tool_choice is not None
                     else None
                 ),
+                **response_format_config,
             ),
         )
@@ -153,16 +175,16 @@ async def get_google_chat_completion_streaming_async(
     timeout: float = 5.0,
     response_format: ResponseFormat | None = None,
 ) -> LLMOutput:
-    if response_format is not None:
-        raise NotImplementedError(
-            "Structured outputs (response_format) are not implemented for Google yet."
-        )
     if logprobs or top_logprobs is not None:
         raise NotImplementedError(
             "We have not implemented logprobs or top_logprobs for Google yet."
         )
     system, input_messages = _parse_chat_messages(messages, tools_provided=bool(tools))
+    response_format_config = _build_response_format_config(
+        response_format,
+        model_name=model_name,
+    )
     try:
         async with async_timeout_ctx(timeout):
@@ -187,6 +209,7 @@ async def get_google_chat_completion_streaming_async(
                         if tool_choice is not None
                         else None
                     ),
+                    **response_format_config,
                 ),
             )

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/_tiktoken_util.py RENAMED Viewed

@@ -9,15 +9,15 @@ def get_token_count(text: str, model: str = "gpt-4") -> int:
     return len(encoding.encode(text, disallowed_special=()))
-def truncate_to_token_limit(text: str, max_tokens: int, model: str = "gpt-4") -> str:
+def truncate_to_token_limit(
+    text: str, max_tokens: int, model: str = "gpt-4"
+) -> tuple[str, int, int]:
     """Truncate text to stay within the specified token limit."""
     encoding = tiktoken.encoding_for_model(model)
     tokens = encoding.encode(text, disallowed_special=())
-    if len(tokens) <= max_tokens:
-        return text
-    return encoding.decode(tokens[:max_tokens])
+    orig_num_tokens = len(tokens)
+    new_tokens = tokens[:max_tokens]
+    return encoding.decode(new_tokens), len(new_tokens), orig_num_tokens
 class MessageRange:

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/agent_run.py RENAMED Viewed

@@ -21,7 +21,7 @@ from docent.data_models.citation import (
     TranscriptBlockMetadataItem,
     TranscriptMetadataItem,
 )
-from docent.data_models.metadata_util import dump_metadata
+from docent.data_models.metadata_util import deep_merge_metadata, dump_metadata
 from docent.data_models.transcript import Transcript, TranscriptGroup, render_metadata_comments
 logger = get_logger(__name__)
@@ -100,6 +100,16 @@ class AgentRun(BaseModel):
         """Returns a dictionary mapping transcript group IDs to TranscriptGroup objects."""
         return {tg.id: tg for tg in self.transcript_groups}
+    def merge_metadata(self, metadata: dict[str, Any] | None) -> None:
+        """
+        Merge metadata into the agent run metadata in-place.
+        Uses a deep merge so nested dictionaries accumulate without losing existing keys.
+        """
+        if not metadata:
+            return
+        deep_merge_metadata(self.metadata, metadata)
     def to_text(
         self,
         children_text: str,

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/data_models/chat/response_format.py RENAMED Viewed

@@ -15,6 +15,7 @@ class ResponseFormat(BaseModel):
     - OpenAI: response_format parameter
     - Anthropic: output_format parameter (with beta header)
     - OpenRouter: response_format parameter (same as OpenAI)
+    - Google Gemini: response_mime_type + response_json_schema in GenerateContentConfig
     Attributes:
         type: The format type. Currently only "json_schema" is supported.

docent_python-0.1.49a0/docent/data_models/metadata_util.py ADDED Viewed

@@ -0,0 +1,32 @@
+import json
+from typing import Any, cast
+from pydantic_core import to_jsonable_python
+def dump_metadata(metadata: dict[str, Any]) -> str | None:
+    """
+    Dump metadata to a JSON string.
+    We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
+    """
+    if not metadata:
+        return None
+    metadata_obj = to_jsonable_python(metadata)
+    text = json.dumps(metadata_obj, indent=2)
+    return text.strip()
+def deep_merge_metadata(destination: dict[str, Any], source: dict[str, Any]) -> dict[str, Any]:
+    """
+    Recursively merge metadata dictionaries in-place.
+    Nested dictionaries are merged to preserve existing keys while allowing
+    later values to override earlier ones.
+    """
+    for key, value in source.items():
+        dest_value = destination.get(key)
+        if isinstance(dest_value, dict) and isinstance(value, dict):
+            deep_merge_metadata(cast(dict[str, Any], dest_value), cast(dict[str, Any], value))
+        else:
+            destination[key] = value
+    return destination

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/docent/sdk/client.py RENAMED Viewed

@@ -253,6 +253,28 @@ class Docent:
             raise requests.HTTPError(f"HTTP {response.status_code}: {detail}", response=response)
+    def _post_with_retry(
+        self,
+        url: str,
+        max_retries: int = 3,
+        backoff_factor: float = 1.0,
+        **kwargs: Any,
+    ) -> requests.Response:
+        """POST with retries on 5xx errors."""
+        last_response: requests.Response | None = None
+        for attempt in range(max_retries + 1):
+            last_response = self._session.post(url, **kwargs)
+            if last_response.status_code < 500 or attempt == max_retries:
+                return last_response
+            wait = backoff_factor * (2**attempt)
+            self._logger.warning(
+                f"Server error {last_response.status_code} on POST {url}, "
+                f"retrying in {wait:.1f}s (attempt {attempt + 1}/{max_retries})"
+            )
+            time.sleep(wait)
+        assert last_response is not None
+        return last_response
     def _login(self, api_key: str):
         """Login with email/password to establish session."""
         self._session.headers.update({"Authorization": f"Bearer {api_key}"})
@@ -405,7 +427,7 @@ class Docent:
                 else:
                     raise ValueError(f"Unsupported compression '{compression}'")
-                response = self._session.post(url, **request_kwargs)
+                response = self._post_with_retry(url, **request_kwargs)
                 self._handle_response_errors(response)
                 # Server returns 202 with job_id for async processing

{docent_python-0.1.47a0 → docent_python-0.1.49a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.47-alpha"
+version = "0.1.49-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]

docent_python-0.1.47a0/docent/data_models/metadata_util.py DELETED Viewed

@@ -1,16 +0,0 @@
-import json
-from typing import Any
-from pydantic_core import to_jsonable_python
-def dump_metadata(metadata: dict[str, Any]) -> str | None:
-    """
-    Dump metadata to a JSON string.
-    We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
-    """
-    if not metadata:
-        return None
-    metadata_obj = to_jsonable_python(metadata)
-    text = json.dumps(metadata_obj, indent=2)
-    return text.strip()