PyPI - docent-python - Versions diffs - 0.1.55a0__tar.gz → 0.1.57a0__tar.gz - Mend

docent-python 0.1.55a0tar.gz → 0.1.57a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

{docent_python-0.1.55a0 → docent_python-0.1.57a0}/.gitignore RENAMED Viewed

@@ -145,6 +145,9 @@ ENV/
 env.bak/
 venv.bak/
+# Docent environment files
+docent.env*
 # Spyder project settings
 .spyderproject
 .spyproject

{docent_python-0.1.55a0 → docent_python-0.1.57a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.55a0
+Version: 0.1.57a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues
@@ -30,6 +30,7 @@ Requires-Dist: opentelemetry-sdk>=1.34.1
 Requires-Dist: orjson>=3.11.6
 Requires-Dist: pandas>=2.3.3
 Requires-Dist: pydantic>=2.11.7
+Requires-Dist: pyjwt>=2.12.0
 Requires-Dist: python-dotenv>=1.0.0
 Requires-Dist: pyyaml>=6.0.2
 Requires-Dist: tiktoken>=0.7.0

{docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/__init__.py RENAMED Viewed

@@ -4,6 +4,7 @@ __all__ = [
     "load_config_file",
     "AgentRunRef",
     "TranscriptRef",
+    "ReadingResultRef",
     "ResultRef",
     "Prompt",
 ]
@@ -13,6 +14,7 @@ from docent.sdk.client import Docent, load_config_file
 from docent.sdk.llm_context import (
     AgentRunRef,
     Prompt,
+    ReadingResultRef,
     ResultRef,
     TranscriptRef,
 )

{docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/llm_svc.py RENAMED Viewed

@@ -89,6 +89,7 @@ async def _parallelize_calls(
     top_logprobs: int | None,
     timeout: float,
     semaphore: Semaphore,
+    max_retries: int,
     # use_tqdm: bool,
     cache: LLMCache | None = None,
     response_format: ResponseFormat | None = None,
@@ -106,6 +107,7 @@ async def _parallelize_calls(
         top_logprobs=top_logprobs,
         timeout=timeout,
         response_format=response_format,
+        max_retries=max_retries,
     )
     responses: list[LLMOutput | None] = [None for _ in inputs]
@@ -357,10 +359,14 @@ class BaseLLMService:
         completion_callback: AsyncLLMOutputStreamingCallback | None = None,
         use_cache: bool = False,
         response_format: ResponseFormat | None = None,
+        max_retries: int = 1,
         _api_key_overrides: dict[str, str] = dict(),
     ) -> list[LLMOutput]:
         """Request completions from a configured LLM provider."""
+        if max_retries < 0:
+            raise ValueError("max_retries must be non-negative")
         # We don't support logprobs for Anthropic yet
         if logprobs:
             for model_option in model_options:
@@ -429,6 +435,7 @@ class BaseLLMService:
                 top_logprobs=top_logprobs,
                 timeout=timeout,
                 semaphore=self._semaphore,
+                max_retries=max_retries,
                 cache=cache,
                 response_format=response_format,
             )

{docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/model_registry.py RENAMED Viewed

@@ -62,6 +62,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
         "claude-sonnet-4-5",
         ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
     ),
+    (
+        "claude-sonnet-4-6",
+        ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000),
+    ),
+    (
+        "claude-opus-4-6",
+        ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
+    ),
     (
         "claude-haiku-4-5",
         ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
@@ -140,7 +148,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
 @lru_cache(maxsize=None)
 def get_model_info(model_name: str) -> Optional[ModelInfo]:
-    for registry_model_name, info in _REGISTRY:
+    for registry_model_name, info in sorted(
+        _REGISTRY, key=lambda entry: len(entry[0]), reverse=True
+    ):
         if registry_model_name in model_name:
             return info
     return None

{docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/anthropic.py RENAMED Viewed

@@ -1,7 +1,5 @@
 from typing import Any, Literal, cast
-import backoff
 # all errors: https://docs.anthropic.com/en/api/errors
 from anthropic import (
     AsyncAnthropic,
@@ -57,8 +55,10 @@ from docent._llm_util.data_models.llm_output import (
     finalize_llm_output_partial,
 )
 from docent._llm_util.providers.common import (
+    ReasoningEffort,
     async_timeout_ctx,
     reasoning_budget,
+    retry_async,
 )
 from docent._log_util import get_logger
 from docent.data_models.chat import (
@@ -217,28 +217,22 @@ def _convert_anthropic_error(e: Exception):
     return None
-@backoff.on_exception(
-    backoff.expo,
-    exception=(Exception),
-    giveup=lambda e: not _is_retryable_error(e),
-    max_tries=5,
-    factor=3.0,
-    on_backoff=_print_backoff_message,
-)
 async def get_anthropic_chat_completion_streaming_async(
     client: AsyncAnthropic,
     streaming_callback: AsyncSingleLLMOutputStreamingCallback | None,
     messages: list[ChatMessage],
     model_name: str,
+    *,
     tools: list[ToolInfo] | None = None,
     tool_choice: Literal["auto", "required"] | None = None,
     max_new_tokens: int = 32,
     temperature: float = 1.0,
-    reasoning_effort: Literal["low", "medium", "high"] | None = None,
+    reasoning_effort: ReasoningEffort | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     timeout: float = 5.0,
     response_format: ResponseFormat | None = None,
+    max_retries: int = 1,
 ):
     if logprobs or top_logprobs is not None:
         raise NotImplementedError(
@@ -247,58 +241,63 @@ async def get_anthropic_chat_completion_streaming_async(
     system, input_messages = parse_chat_messages(messages)
-    try:
-        async with async_timeout_ctx(timeout):
-            create_kwargs: dict[str, Any] = {
-                "model": model_name,
-                "messages": input_messages,
-                "max_tokens": max_new_tokens,
-                "temperature": temperature,
-                "stream": True,
-            }
-            if reasoning_effort:
-                create_kwargs["thinking"] = {
-                    "type": "enabled",
-                    "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
+    async def _call() -> LLMOutput:
+        try:
+            async with async_timeout_ctx(timeout):
+                create_kwargs: dict[str, Any] = {
+                    "model": model_name,
+                    "messages": input_messages,
+                    "max_tokens": max_new_tokens,
+                    "temperature": temperature,
+                    "stream": True,
                 }
-            if tools:
-                create_kwargs["tools"] = parse_tools(tools)
-            if tool_choice_param := _parse_tool_choice(tool_choice):
-                create_kwargs["tool_choice"] = tool_choice_param
-            if system is not None:
-                create_kwargs["system"] = system
-            if response_format is not None:
-                output_format = _build_output_format(response_format)
-                extra_headers = dict(create_kwargs.get("extra_headers", {}))
-                extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
-                create_kwargs["extra_headers"] = extra_headers
-                extra_body = dict(create_kwargs.get("extra_body", {}))
-                extra_body["output_format"] = output_format
-                create_kwargs["extra_body"] = extra_body
-            stream = cast(
-                AsyncStream[RawMessageStreamEvent],
-                await client.messages.create(**create_kwargs),
-            )
+                if reasoning_effort:
+                    create_kwargs["thinking"] = {
+                        "type": "enabled",
+                        "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
+                    }
+                if tools:
+                    create_kwargs["tools"] = parse_tools(tools)
+                if tool_choice_param := _parse_tool_choice(tool_choice):
+                    create_kwargs["tool_choice"] = tool_choice_param
+                if system is not None:
+                    create_kwargs["system"] = system
+                if response_format is not None:
+                    output_format = _build_output_format(response_format)
+                    extra_headers = dict(create_kwargs.get("extra_headers", {}))
+                    extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
+                    create_kwargs["extra_headers"] = extra_headers
+                    extra_body = dict(create_kwargs.get("extra_body", {}))
+                    extra_body["output_format"] = output_format
+                    create_kwargs["extra_body"] = extra_body
+                stream = cast(
+                    AsyncStream[RawMessageStreamEvent],
+                    await client.messages.create(**create_kwargs),
+                )
-            llm_output_partial = None
-            async for chunk in stream:
-                llm_output_partial = update_llm_output(llm_output_partial, chunk)
-                if streaming_callback:
-                    await streaming_callback(finalize_llm_output_partial(llm_output_partial))
+                llm_output_partial = None
+                async for chunk in stream:
+                    llm_output_partial = update_llm_output(llm_output_partial, chunk)
+                    if streaming_callback:
+                        await streaming_callback(finalize_llm_output_partial(llm_output_partial))
-            # Fully parse the partial output
-            if llm_output_partial:
-                return finalize_llm_output_partial(llm_output_partial)
-            else:
-                # Streaming did not produce anything
+                if llm_output_partial:
+                    return finalize_llm_output_partial(llm_output_partial)
                 return LLMOutput(model=model_name, completions=[], errors=[NoResponseException()])
-    except (RateLimitError, BadRequestError) as e:
-        if e2 := _convert_anthropic_error(e):
-            raise e2 from e
-        else:
+        except (RateLimitError, BadRequestError) as e:
+            if e2 := _convert_anthropic_error(e):
+                raise e2 from e
             raise
+    return await retry_async(
+        _call,
+        max_retries=max_retries,
+        is_retryable_error=_is_retryable_error,
+        factor=3.0,
+        on_backoff=_print_backoff_message,
+    )
 FINISH_REASON_MAP: dict[str, FinishReasonType] = {
     "end_turn": "stop",
@@ -413,27 +412,21 @@ def update_llm_output(
     )
-@backoff.on_exception(
-    backoff.expo,
-    exception=(Exception),
-    giveup=lambda e: not _is_retryable_error(e),
-    max_tries=5,
-    factor=3.0,
-    on_backoff=_print_backoff_message,
-)
 async def get_anthropic_chat_completion_async(
     client: AsyncAnthropic,
     messages: list[ChatMessage],
     model_name: str,
+    *,
     tools: list[ToolInfo] | None = None,
     tool_choice: Literal["auto", "required"] | None = None,
     max_new_tokens: int = 32,
     temperature: float = 1.0,
-    reasoning_effort: Literal["low", "medium", "high"] | None = None,
+    reasoning_effort: ReasoningEffort | None = None,
     logprobs: bool = False,
     top_logprobs: int | None = None,
     timeout: float = 5.0,
     response_format: ResponseFormat | None = None,
+    max_retries: int = 1,
 ) -> LLMOutput:
     """
     Note from kevin 1/29/2025:
@@ -451,49 +444,57 @@ async def get_anthropic_chat_completion_async(
     system, input_messages = parse_chat_messages(messages)
-    try:
-        async with async_timeout_ctx(timeout):
-            create_kwargs: dict[str, Any] = {
-                "model": model_name,
-                "messages": input_messages,
-                "max_tokens": max_new_tokens,
-                "temperature": temperature,
-            }
-            if reasoning_effort:
-                create_kwargs["thinking"] = {
-                    "type": "enabled",
-                    "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
+    async def _call() -> LLMOutput:
+        try:
+            async with async_timeout_ctx(timeout):
+                create_kwargs: dict[str, Any] = {
+                    "model": model_name,
+                    "messages": input_messages,
+                    "max_tokens": max_new_tokens,
+                    "temperature": temperature,
                 }
-            if tools:
-                create_kwargs["tools"] = parse_tools(tools)
-            if tool_choice_param := _parse_tool_choice(tool_choice):
-                create_kwargs["tool_choice"] = tool_choice_param
-            if system is not None:
-                create_kwargs["system"] = system
-            if response_format is not None:
-                output_format = _build_output_format(response_format)
-                extra_headers = dict(create_kwargs.get("extra_headers", {}))
-                extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
-                create_kwargs["extra_headers"] = extra_headers
-                extra_body = dict(create_kwargs.get("extra_body", {}))
-                extra_body["output_format"] = output_format
-                create_kwargs["extra_body"] = extra_body
-            raw_output = cast(Message, await client.messages.create(**create_kwargs))
-            output = parse_anthropic_completion(raw_output, model_name)
-            if output.first and output.first.finish_reason == "length" and output.first.no_text:
-                raise CompletionTooLongException(
-                    "Completion empty due to truncation. Consider increasing max_new_tokens."
-                )
+                if reasoning_effort:
+                    create_kwargs["thinking"] = {
+                        "type": "enabled",
+                        "budget_tokens": reasoning_budget(max_new_tokens, reasoning_effort),
+                    }
+                if tools:
+                    create_kwargs["tools"] = parse_tools(tools)
+                if tool_choice_param := _parse_tool_choice(tool_choice):
+                    create_kwargs["tool_choice"] = tool_choice_param
+                if system is not None:
+                    create_kwargs["system"] = system
+                if response_format is not None:
+                    output_format = _build_output_format(response_format)
+                    extra_headers = dict(create_kwargs.get("extra_headers", {}))
+                    extra_headers["anthropic-beta"] = ANTHROPIC_STRUCTURED_OUTPUTS_BETA
+                    create_kwargs["extra_headers"] = extra_headers
+                    extra_body = dict(create_kwargs.get("extra_body", {}))
+                    extra_body["output_format"] = output_format
+                    create_kwargs["extra_body"] = extra_body
+                raw_output = cast(Message, await client.messages.create(**create_kwargs))
+                output = parse_anthropic_completion(raw_output, model_name)
+                if output.first and output.first.finish_reason == "length" and output.first.no_text:
+                    raise CompletionTooLongException(
+                        "Completion empty due to truncation. Consider increasing max_new_tokens."
+                    )
-            return output
-    except (RateLimitError, BadRequestError) as e:
-        if e2 := _convert_anthropic_error(e):
-            raise e2 from e
-        else:
+                return output
+        except (RateLimitError, BadRequestError) as e:
+            if e2 := _convert_anthropic_error(e):
+                raise e2 from e
             raise
+    return await retry_async(
+        _call,
+        max_retries=max_retries,
+        is_retryable_error=_is_retryable_error,
+        factor=3.0,
+        on_backoff=_print_backoff_message,
+    )
 def get_anthropic_client_async(api_key: str | None = None) -> AsyncAnthropic:
     return AsyncAnthropic(api_key=api_key) if api_key else AsyncAnthropic()

{docent_python-0.1.55a0 → docent_python-0.1.57a0}/docent/_llm_util/providers/common.py RENAMED Viewed

@@ -1,7 +1,12 @@
 import asyncio
 import json
 from contextlib import asynccontextmanager
-from typing import Any, AsyncIterator, Literal, cast
+from typing import Any, AsyncIterator, Awaitable, Callable, Literal, cast
+import backoff
+from backoff.types import Details
+ReasoningEffort = Literal["minimal", "low", "medium", "high"]
 @asynccontextmanager
@@ -14,13 +19,15 @@ async def async_timeout_ctx(timeout: float | None) -> AsyncIterator[None]:
         yield
-def reasoning_budget(max_new_tokens: int, effort: Literal["low", "medium", "high"]) -> int:
+def reasoning_budget(max_new_tokens: int, effort: ReasoningEffort) -> int:
     if effort == "high":
         ratio = 0.75
     elif effort == "medium":
         ratio = 0.5
-    else:
+    elif effort == "low":
         ratio = 0.25
+    else:
+        ratio = 0.1
     return int(max_new_tokens * ratio)
@@ -39,3 +46,25 @@ def coerce_tool_args(args: Any) -> dict[str, Any]:
             return {"__parse_error_raw_args": args}
     # Fallback: unknown structure
     return {"__parse_error_raw_args": str(args)}
+async def retry_async(
+    func: Callable[[], Awaitable[Any]],
+    *,
+    max_retries: int,
+    is_retryable_error: Callable[[BaseException], bool],
+    factor: float,
+    on_backoff: Callable[[Details], None] | None = None,
+) -> Any:
+    if max_retries < 0:
+        raise ValueError("max_retries must be non-negative")
+    decorated = backoff.on_exception(
+        backoff.expo,
+        exception=(Exception,),
+        giveup=lambda e: not is_retryable_error(e),
+        max_tries=max_retries + 1,
+        factor=factor,
+        on_backoff=on_backoff,
+    )(func)
+    return await decorated()

docent-python 0.1.55a0__tar.gz → 0.1.57a0__tar.gz

docent-python 0.1.55a0tar.gz → 0.1.57a0tar.gz