PyPI - docent-python - Versions diffs - 0.1.61a0__tar.gz → 0.1.62a0__tar.gz - Mend

docent-python 0.1.61a0tar.gz → 0.1.62a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

{docent_python-0.1.61a0 → docent_python-0.1.62a0}/.gitignore RENAMED Viewed

@@ -145,8 +145,9 @@ ENV/
 env.bak/
 venv.bak/
-# Docent environment files
+# Docent
 docent.env*
+docent_analyses/
 # Spyder project settings
 .spyderproject
@@ -204,3 +205,6 @@ data/cache
 # dont commit package lock, force use of bun lock
 package-lock.json
+# Claude Code worktrees
+.claude/worktrees/

{docent_python-0.1.61a0 → docent_python-0.1.62a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.61a0
+Version: 0.1.62a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.61a0 → docent_python-0.1.62a0}/docent/_llm_util/data_models/llm_output.py RENAMED Viewed

@@ -97,7 +97,6 @@ class LLMOutput:
     completions: list[LLMCompletion]
     errors: list[LLMException] = field(default_factory=lambda: [])
     usage: UsageMetrics = field(default_factory=UsageMetrics)
-    from_cache: bool = False
     duration: float | None = None
     @property
@@ -142,7 +141,6 @@ class LLMOutput:
             "completions": [comp.model_dump() for comp in self.completions],
             "errors": [e.error_type_id for e in self.errors],
             "usage": self.usage.to_dict(),
-            "from_cache": self.from_cache,
             "duration": self.duration,
         }
@@ -171,7 +169,6 @@ class LLMOutput:
             completions=completions,
             errors=errors,
             usage=UsageMetrics(**usage),
-            from_cache=bool(data.get("from_cache", False)),
             duration=data.get("duration"),
         )
@@ -275,7 +272,6 @@ def finalize_llm_output_partial(partial: LLMOutputPartial) -> LLMOutput:
             for c in partial.completions
         ],
         usage=partial.usage,
-        from_cache=False,
     )
     # If the completion is empty and was truncated (likely due to too much reasoning), raise an exception

{docent_python-0.1.61a0 → docent_python-0.1.62a0}/docent/_llm_util/llm_svc.py RENAMED Viewed

@@ -28,7 +28,6 @@ from docent._llm_util.data_models.llm_output import (
     AsyncSingleLLMOutputStreamingCallback,
     LLMOutput,
 )
-from docent._llm_util.llm_cache import LLMCache
 from docent._llm_util.providers.preference_types import ModelOption
 from docent._llm_util.providers.provider_registry import (
     PROVIDERS,
@@ -37,6 +36,7 @@ from docent._llm_util.providers.provider_registry import (
 )
 from docent._log_util import get_logger
 from docent.data_models.chat import ChatMessage, ToolInfo, parse_chat_message
+from docent.data_models.chat.message import AssistantMessage, UserMessage
 from docent.data_models.chat.response_format import ResponseFormat
 logger = get_logger(__name__)
@@ -91,8 +91,8 @@ async def _parallelize_calls(
     semaphore: Semaphore,
     max_retries: int,
     # use_tqdm: bool,
-    cache: LLMCache | None = None,
     response_format: ResponseFormat | None = None,
+    retry_with_feedback: bool = False,
 ):
     base_func = partial(
         single_output_getter,
@@ -120,122 +120,127 @@ async def _parallelize_calls(
         else None
     )
-    # Save resolved messages to avoid multiple resolutions
-    resolved_messages: list[list[ChatMessage] | None] = [None] * len(inputs)
     # Not sure why the cast is necessary for the type checker
     cancelled_due_to_usage_limit: bool = cast(bool, False)
+    def _mark_usage_limit_responses() -> None:
+        for i, response in enumerate(responses):
+            if response is None:
+                responses[i] = LLMOutput(
+                    model=model_name,
+                    completions=[],
+                    errors=[DocentUsageLimitException()],
+                )
+            elif not response.completions and not response.errors:
+                response.errors.append(DocentUsageLimitException())
     async def _limited_task(i: int, cur_input: MessagesInput, tg: TaskGroup):
-        nonlocal responses, pbar, resolved_messages, cancelled_due_to_usage_limit
+        nonlocal responses, pbar, cancelled_due_to_usage_limit
         async with semaphore:
             messages = _resolve_messages_input(cur_input)
-            resolved_messages[i] = messages
             retry_count = 0
             result = None
-            call_started_at: float | None = None
-            # Check if there's a cached result
-            cached_result = (
-                cache.get(
-                    messages,
-                    model_name,
-                    tools=tools,
-                    tool_choice=tool_choice,
-                    reasoning_effort=reasoning_effort,
-                    temperature=temperature,
-                    logprobs=logprobs,
-                    top_logprobs=top_logprobs,
-                    response_format=response_format,
-                )
-                if cache is not None
-                else None
-            )
-            if cached_result is not None:
-                result = cached_result
-                if streaming_callback is not None:
-                    await streaming_callback(i, result)
-            else:
-                call_started_at = time.perf_counter()
-                while retry_count < MAX_VALIDATION_ATTEMPTS:
-                    try:
-                        if streaming_callback is None:
-                            result = await base_func(client=client, messages=messages)
-                        else:
-                            result = await base_func(
-                                client=client,
-                                streaming_callback=_get_single_streaming_callback(
-                                    i, streaming_callback
-                                ),
-                                messages=messages,
-                            )
-                        # Validate if validation callback provided and result is successful
-                        if validation_callback and not result.did_error:
-                            await validation_callback(i, result)
-                        break
-                    except ValidationFailedException as e:
-                        retry_count += 1
-                        logger.warning(
-                            f"Validation failed for {model_name} after {retry_count} attempts: {e}"
+            call_started_at = time.perf_counter()
+            current_messages = messages
+            while retry_count < MAX_VALIDATION_ATTEMPTS:
+                try:
+                    if streaming_callback is None:
+                        result = await base_func(client=client, messages=current_messages)
+                    else:
+                        result = await base_func(
+                            client=client,
+                            streaming_callback=_get_single_streaming_callback(
+                                i, streaming_callback
+                            ),
+                            messages=current_messages,
                         )
-                        if retry_count >= MAX_VALIDATION_ATTEMPTS:
-                            logger.error(
-                                f"Validation failed for {model_name} after {retry_count} attempts. Original output: {e.failed_output}"
-                            )
-                            result = LLMOutput(
-                                model=model_name,
-                                completions=[],
-                                errors=[e],
-                            )
-                            break
-                    except DocentUsageLimitException as _:
-                        result = LLMOutput(
-                            model=model_name,
-                            completions=[],
-                            errors=[],  # Usage limit exceptions will be added to all results later if cancelled_due_to_usage_limit
+                    # Validate if validation callback provided and result is successful
+                    if validation_callback and not result.did_error:
+                        await validation_callback(i, result)
+                    break
+                except ValidationFailedException as e:
+                    retry_count += 1
+                    logger.warning(
+                        "Validation failed for %s after %d attempts: %s",
+                        model_name,
+                        retry_count,
+                        e,
+                        extra={"original_output": e.failed_output},
+                    )
+                    if retry_count >= MAX_VALIDATION_ATTEMPTS:
+                        logger.error(
+                            "Validation failed for %s after %d attempts: %s",
+                            model_name,
+                            retry_count,
+                            e,
+                            extra={"original_output": e.failed_output},
                         )
-                        cancelled_due_to_usage_limit = True
-                        tg.cancel_scope.cancel()
-                        break
-                    except asyncio.TimeoutError as e:
-                        timeout_exception = TimeoutException(str(e) or "Request timed out")
-                        timeout_exception.__cause__ = e
-                        logger.error(f"Call to {model_name} timed out")
                         result = LLMOutput(
                             model=model_name,
                             completions=[],
-                            errors=[timeout_exception],
+                            errors=[e],
                         )
                         break
-                    except Exception as e:
-                        if not isinstance(e, LLMException):
-                            logger.error(
-                                f"LLM call raised an exception that is not an LLMException: {e}. Failure traceback:\n{traceback.format_exc()}"
-                            )
-                            llm_exception = LLMException(e)
-                            llm_exception.__cause__ = e
-                        else:
-                            llm_exception = e
-                        error_message = f"Call to {model_name} failed even with backoff: {e.__class__.__name__}."
-                        if not isinstance(e, RateLimitException):
-                            error_message += f" Failure traceback:\n{traceback.format_exc()}"
-                        logger.error(error_message)
-                        result = LLMOutput(
-                            model=model_name,
-                            completions=[],
-                            errors=[llm_exception],
+                    if retry_with_feedback:
+                        # Build a new message list with the failed output and
+                        # error feedback so the model can correct itself
+                        current_messages = [
+                            *messages,
+                            AssistantMessage(content=e.failed_output or ""),
+                            UserMessage(
+                                content=f"Your previous output failed validation: {e}\n\nPlease try again with a corrected output."
+                            ),
+                        ]
+                except DocentUsageLimitException as _:
+                    result = LLMOutput(
+                        model=model_name,
+                        completions=[],
+                        errors=[],  # Usage limit exceptions will be added to all results later if cancelled_due_to_usage_limit
+                    )
+                    cancelled_due_to_usage_limit = True
+                    tg.cancel_scope.cancel()
+                    break
+                except asyncio.TimeoutError as e:
+                    timeout_exception = TimeoutException(str(e) or "Request timed out")
+                    timeout_exception.__cause__ = e
+                    logger.error(f"Call to {model_name} timed out")
+                    result = LLMOutput(
+                        model=model_name,
+                        completions=[],
+                        errors=[timeout_exception],
+                    )
+                    break
+                except Exception as e:
+                    if not isinstance(e, LLMException):
+                        logger.error(
+                            f"LLM call raised an exception that is not an LLMException: {e}. Failure traceback:\n{traceback.format_exc()}"
                         )
-                        break
+                        llm_exception = LLMException(e)
+                        llm_exception.__cause__ = e
+                    else:
+                        llm_exception = e
+                    error_message = (
+                        f"Call to {model_name} failed even with backoff: {e.__class__.__name__}."
+                    )
-            # Only store the elapsed time if we didn't hit the cache and the call was successful
-            if cached_result is None and result is not None and call_started_at is not None:
+                    if not isinstance(e, RateLimitException):
+                        error_message += f" Failure traceback:\n{traceback.format_exc()}"
+                    logger.error(error_message)
+                    result = LLMOutput(
+                        model=model_name,
+                        completions=[],
+                        errors=[llm_exception],
+                    )
+                    break
+            if result is not None:
                 result.duration = time.perf_counter() - call_started_at
             # Always call completion callback with final result (success or error)
@@ -244,44 +249,14 @@ async def _parallelize_calls(
                     await completion_callback(i, result)
                 # LLMService uses this callback to record cost, and may throw an error if we just exceeded limit
                 except DocentUsageLimitException as e:
-                    result.errors.append(e)
+                    if not result.completions and not result.errors:
+                        result.errors.append(e)
                     cancelled_due_to_usage_limit = True
                     tg.cancel_scope.cancel()
             responses[i] = result
             if pbar is not None:
                 pbar.update(1)
-            if pbar is None or pbar.n == pbar.total:
-                tg.cancel_scope.cancel()
-    def _cache_responses():
-        nonlocal responses, cache
-        if cache is not None:
-            indices = [
-                i
-                for i, response in enumerate(responses)
-                if resolved_messages[i] is not None
-                and response is not None
-                and not response.did_error
-            ]
-            cache.set_batch(
-                # We already checked that each index has a resolved messages list
-                [cast(list[ChatMessage], resolved_messages[i]) for i in indices],
-                model_name,
-                # We already checked that each index corresponds to an LLMOutput object
-                [cast(LLMOutput, responses[i]) for i in indices],
-                tools=tools,
-                tool_choice=tool_choice,
-                reasoning_effort=reasoning_effort,
-                temperature=temperature,
-                logprobs=logprobs,
-                top_logprobs=top_logprobs,
-                response_format=response_format,
-            )
-            return len(indices)
-        else:
-            return 0
     # Get all results concurrently
     try:
@@ -290,30 +265,14 @@ async def _parallelize_calls(
             for i, cur_input in enumerate(inputs):
                 tg.start_soon(_limited_task, i, cur_input, tg)
-    # Cache what we have so far if something got cancelled
     except anyio.get_cancelled_exc_class():
-        num_cached = _cache_responses()
-        if num_cached:
-            logger.info(
-                f"Cancelled {len(inputs) - num_cached} unfinished LLM API calls, but cached {num_cached} completed responses"
-            )
-        # If the task was cancelled due to usage limit, set the response to a usage limit exception
-        if cancelled_due_to_usage_limit:
-            for i, response in enumerate(responses):
-                if response is None:
-                    responses[i] = LLMOutput(
-                        model=model_name,
-                        completions=[],
-                        errors=[DocentUsageLimitException()],
-                    )
-                else:
-                    response.errors.append(DocentUsageLimitException())
+        if not cancelled_due_to_usage_limit:
+            raise
-        raise
-    # Cache results if available
-    _cache_responses()
+    # If we stopped the batch due to usage limits, make sure every input has a
+    # structured result instead of relying on AnyIO's cancellation propagation.
+    if cancelled_due_to_usage_limit:
+        _mark_usage_limit_responses()
     # At this point, all indices should have a result
     assert all(isinstance(r, LLMOutput) for r in responses), (
@@ -357,9 +316,9 @@ class BaseLLMService:
         streaming_callback: AsyncLLMOutputStreamingCallback | None = None,
         validation_callback: AsyncLLMOutputStreamingCallback | None = None,
         completion_callback: AsyncLLMOutputStreamingCallback | None = None,
-        use_cache: bool = False,
         response_format: ResponseFormat | None = None,
         max_retries: int = 1,
+        retry_with_feedback: bool = False,
         _api_key_overrides: dict[str, str] = dict(),
     ) -> list[LLMOutput]:
         """Request completions from a configured LLM provider."""
@@ -375,14 +334,6 @@ class BaseLLMService:
                         f"Logprobs are not supported for Anthropic, so we can't use model {model_option.model_name}"
                     )
-        # Instantiate cache
-        # TODO(mengk): make this more robust, possibly move to a NoSQL database or something
-        try:
-            cache = LLMCache() if use_cache else None
-        except ValueError as e:
-            logger.warning(f"Disabling LLM cache due to init error: {e}")
-            cache = None
         # Initialize pointer to which model we're using; used for model rotation after failures
         current_model_option_index = 0
@@ -413,7 +364,7 @@ class BaseLLMService:
             single_output_getter = PROVIDERS[provider]["single_output_getter"]
             single_streaming_output_getter = PROVIDERS[provider]["single_streaming_output_getter"]
-            # Get completions for uncached messages
+            # Get completions for messages.
             outputs: list[LLMOutput] = await _parallelize_calls(
                 (
                     single_output_getter
@@ -436,11 +387,18 @@ class BaseLLMService:
                 timeout=timeout,
                 semaphore=self._semaphore,
                 max_retries=max_retries,
-                cache=cache,
                 response_format=response_format,
+                retry_with_feedback=retry_with_feedback,
             )
             assert len(outputs) == len(inputs), "Number of outputs must match number of messages"
+            if any(
+                isinstance(e, DocentUsageLimitException)
+                for output in outputs
+                for e in output.errors
+            ):
+                break
             # Only count errors that should trigger model rotation (API errors, not validation/usage errors)
             num_rotation_errors = sum(
                 1

{docent_python-0.1.61a0 → docent_python-0.1.62a0}/docent/_log_util/logger.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import logging
+import os
 import sys
 from dataclasses import dataclass
 from typing import IO, Any, Dict, Literal, MutableMapping, Optional, Tuple
@@ -135,8 +136,8 @@ def get_logger(namespace: str, stream: IO[str] | None = None) -> LoggerAdapter:
         handler.setFormatter(ColoredFormatter())
         logger.addHandler(handler)
-        # Set default level to INFO
-        logger.setLevel(logging.INFO)
+        level_name = os.environ.get("DOCENT_LOG_LEVEL", "INFO").upper()
+        logger.setLevel(getattr(logging, level_name, logging.INFO))
     # Wrap with adapter to support highlighting
     return LoggerAdapter(logger, {})

{docent_python-0.1.61a0 → docent_python-0.1.62a0}/docent/data_models/__init__.py RENAMED Viewed

@@ -2,6 +2,7 @@ from docent.data_models.agent_run import AgentRun
 from docent.data_models.citation import InlineCitation
 from docent.data_models.judge import Label
 from docent.data_models.regex import RegexSnippet
+from docent.data_models.report import Report
 from docent.data_models.transcript import Transcript, TranscriptGroup
 __all__ = [
@@ -9,6 +10,7 @@ __all__ = [
     "InlineCitation",
     "Label",
     "RegexSnippet",
+    "Report",
     "Transcript",
     "TranscriptGroup",
 ]

docent-python 0.1.61a0__tar.gz → 0.1.62a0__tar.gz

docent-python 0.1.61a0tar.gz → 0.1.62a0tar.gz