PyPI - docent-python - Versions diffs - 0.1.24a0__tar.gz → 0.1.26a0__tar.gz - Mend

docent-python 0.1.24a0tar.gz → 0.1.26a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docent-python might be problematic. Click here for more details.

Files changed (61) hide show

{docent_python-0.1.24a0 → docent_python-0.1.26a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.24a0
+Version: 0.1.26a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.24a0 → docent_python-0.1.26a0}/docent/_llm_util/data_models/llm_output.py RENAMED Viewed

@@ -8,6 +8,7 @@ from pydantic import BaseModel
 from docent._llm_util.data_models.exceptions import (
     LLM_ERROR_TYPES,
     CompletionTooLongException,
+    ContextWindowException,
     LLMException,
 )
 from docent._log_util import get_logger
@@ -148,6 +149,13 @@ class LLMOutput:
     def from_dict(cls, data: dict[str, Any]) -> "LLMOutput":
         error_type_map = {e.error_type_id: e for e in LLM_ERROR_TYPES}
         errors = data.get("errors", [])
+        error_types_to_not_log: list[str] = [
+            CompletionTooLongException.error_type_id,
+            ContextWindowException.error_type_id,
+        ]
+        errors_to_log = [e for e in errors if e not in error_types_to_not_log]
+        if errors_to_log:
+            logger.error(f"Loading LLM output with errors: {errors}")
         errors = [error_type_map.get(e, LLMException)() for e in errors]
         completions = data.get("completions", [])

{docent_python-0.1.24a0 → docent_python-0.1.26a0}/docent/_llm_util/llm_svc.py RENAMED Viewed

@@ -176,7 +176,7 @@ async def _parallelize_calls(
                         )
                         if retry_count >= MAX_VALIDATION_ATTEMPTS:
                             logger.error(
-                                f"Validation failed for {model_name} after {MAX_VALIDATION_ATTEMPTS} attempts: {e}"
+                                f"Validation failed for {model_name} after {retry_count} attempts. Original output: {e.failed_output}"
                             )
                             result = LLMOutput(
                                 model=model_name,
@@ -195,8 +195,8 @@ async def _parallelize_calls(
                         break
                     except Exception as e:
                         if not isinstance(e, LLMException):
-                            logger.warning(
-                                f"LLM call raised an exception that is not an LLMException: {e}"
+                            logger.error(
+                                f"LLM call raised an exception that is not an LLMException: {e}. Failure traceback:\n{traceback.format_exc()}"
                             )
                             llm_exception = LLMException(e)
                             llm_exception.__cause__ = e

{docent_python-0.1.24a0 → docent_python-0.1.26a0}/docent/_llm_util/providers/openai.py RENAMED Viewed

@@ -18,8 +18,13 @@ from openai import (
     PermissionDeniedError,
     RateLimitError,
     UnprocessableEntityError,
-    omit,
 )
+try:
+    from openai import omit
+except ImportError:
+    from openai import Omit as _OpenAIOmit
+    omit = _OpenAIOmit()
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionAssistantMessageParam,

{docent_python-0.1.24a0 → docent_python-0.1.26a0}/docent/trace.py RENAMED Viewed

@@ -1,12 +1,15 @@
+import asyncio
 import atexit
 import contextvars
 import itertools
+import json
 import logging
 import os
 import sys
 import threading
 import uuid
 from collections import defaultdict
+from concurrent.futures import Future, ThreadPoolExecutor
 from contextlib import asynccontextmanager, contextmanager
 from contextvars import ContextVar, Token
 from datetime import datetime, timezone
@@ -129,7 +132,13 @@ class DocentTracer:
             lambda: itertools.count(0)
         )
         self._transcript_counter_lock = threading.Lock()
+        self._transcript_group_states: dict[str, dict[str, Optional[str]]] = {}
+        self._transcript_group_state_lock = threading.Lock()
         self._flush_lock = threading.Lock()
+        self._http_executor: Optional[ThreadPoolExecutor] = None
+        self._http_executor_lock = threading.Lock()
+        self._pending_http_futures: Set[Future[Any]] = set()
+        self._pending_http_lock = threading.Lock()
     def get_current_agent_run_id(self) -> Optional[str]:
         """
@@ -439,6 +448,12 @@ class DocentTracer:
         try:
             self.flush()
+            if self._http_executor:
+                self._http_executor.shutdown(wait=True)
+                self._http_executor = None
+            with self._pending_http_lock:
+                self._pending_http_futures.clear()
             if self._tracer_provider:
                 self._tracer_provider.shutdown()
                 self._tracer_provider = None
@@ -469,6 +484,7 @@ class DocentTracer:
                 if hasattr(processor, "force_flush"):
                     logger.debug(f"Flushing span processor {i}")
                     processor.force_flush(timeout_millis=50)
+            self._wait_for_http_requests()
             logger.debug("Span flush completed")
         except Exception as e:
             logger.error(f"Error during flush: {e}")
@@ -615,7 +631,66 @@ class DocentTracer:
         return headers
-    def _post_json(self, path: str, data: Dict[str, Any]) -> None:
+    def _get_http_executor(self) -> ThreadPoolExecutor:
+        with self._http_executor_lock:
+            if self._http_executor is None:
+                self._http_executor = ThreadPoolExecutor(
+                    max_workers=4, thread_name_prefix="docent-http"
+                )
+            return self._http_executor
+    def _should_run_http_in_background(self) -> bool:
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            return False
+        return loop.is_running()
+    def _on_http_future_done(self, future: Future[Any]) -> None:
+        with self._pending_http_lock:
+            self._pending_http_futures.discard(future)
+        try:
+            future.result()
+        except Exception as exc:  # pragma: no cover - defensive logging
+            logger.error(f"Background HTTP request failed: {exc}")
+    def _schedule_background_post(self, task: Callable[[], None]) -> None:
+        executor = self._get_http_executor()
+        future = executor.submit(task)
+        with self._pending_http_lock:
+            self._pending_http_futures.add(future)
+        future.add_done_callback(self._on_http_future_done)
+    def _wait_for_http_requests(self) -> None:
+        while True:
+            with self._pending_http_lock:
+                pending = list(self._pending_http_futures)
+            if not pending:
+                break
+            for future in pending:
+                try:
+                    future.result()
+                except Exception as exc:  # pragma: no cover - defensive logging
+                    logger.error(f"Background HTTP request failed: {exc}")
+    def _ensure_json_serializable_metadata(self, metadata: Dict[str, Any], context: str) -> None:
+        """
+        Validate that metadata can be serialized to JSON before sending it to the backend.
+        """
+        try:
+            json.dumps(metadata)
+        except (TypeError, ValueError) as exc:
+            raise TypeError(f"{context} metadata must be JSON serializable") from exc
+    def _post_json(
+        self, path: str, data: Dict[str, Any], *, allow_background: bool = False
+    ) -> None:
+        if allow_background and self._should_run_http_in_background():
+            self._schedule_background_post(lambda: self._post_json_sync(path, data))
+            return
+        self._post_json_sync(path, data)
+    def _post_json_sync(self, path: str, data: Dict[str, Any]) -> None:
         if not self._api_endpoint_base:
             raise RuntimeError("API endpoint base is not configured")
         url = f"{self._api_endpoint_base}{path}"
@@ -660,6 +735,8 @@ class DocentTracer:
         if self._disabled:
             return
+        self._ensure_json_serializable_metadata(metadata, "Agent run")
         collection_id = self.collection_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
@@ -667,7 +744,7 @@ class DocentTracer:
             "metadata": metadata,
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
-        self._post_json("/v1/agent-run-metadata", payload)
+        self._post_json("/v1/agent-run-metadata", payload, allow_background=True)
     def send_transcript_metadata(
         self,
@@ -705,6 +782,7 @@ class DocentTracer:
         if transcript_group_id is not None:
             payload["transcript_group_id"] = transcript_group_id
         if metadata is not None:
+            self._ensure_json_serializable_metadata(metadata, "Transcript")
             payload["metadata"] = metadata
         self._post_json("/v1/transcript-metadata", payload)
@@ -888,6 +966,27 @@ class DocentTracer:
             )
             return
+        with self._transcript_group_state_lock:
+            state: dict[str, Optional[str]] = self._transcript_group_states.setdefault(
+                transcript_group_id, {}
+            )
+            final_name: Optional[str] = name if name is not None else state.get("name")
+            final_description: Optional[str] = (
+                description if description is not None else state.get("description")
+            )
+            final_parent_transcript_group_id: Optional[str] = (
+                parent_transcript_group_id
+                if parent_transcript_group_id is not None
+                else state.get("parent_transcript_group_id")
+            )
+            if final_name is not None:
+                state["name"] = final_name
+            if final_description is not None:
+                state["description"] = final_description
+            if final_parent_transcript_group_id is not None:
+                state["parent_transcript_group_id"] = final_parent_transcript_group_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
             "transcript_group_id": transcript_group_id,
@@ -895,13 +994,14 @@ class DocentTracer:
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
-        if name is not None:
-            payload["name"] = name
-        if description is not None:
-            payload["description"] = description
-        if parent_transcript_group_id is not None:
-            payload["parent_transcript_group_id"] = parent_transcript_group_id
+        if final_name is not None:
+            payload["name"] = final_name
+        if final_description is not None:
+            payload["description"] = final_description
+        if final_parent_transcript_group_id is not None:
+            payload["parent_transcript_group_id"] = final_parent_transcript_group_id
         if metadata is not None:
+            self._ensure_json_serializable_metadata(metadata, "Transcript group")
             payload["metadata"] = metadata
         self._post_json("/v1/transcript-group-metadata", payload)

{docent_python-0.1.24a0 → docent_python-0.1.26a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.24-alpha"
+version = "0.1.26-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]