PyPI - docent-python - Versions diffs - 0.1.57a0__tar.gz → 0.1.59a0__tar.gz - Mend

docent-python 0.1.57a0tar.gz → 0.1.59a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

{docent_python-0.1.57a0 → docent_python-0.1.59a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.57a0
+Version: 0.1.59a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/_llm_util/model_registry.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import re
 from dataclasses import dataclass
 from functools import lru_cache
 from typing import Optional
@@ -9,6 +10,8 @@ from docent._log_util import get_logger
 logger = get_logger(__name__)
+_CLAUDE_VERSION_PATTERN = re.compile(r"(claude-(?:haiku|sonnet|opus)-4)[.-](\d+)\b")
 """
 Values are USD per million tokens
@@ -34,6 +37,14 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
         "gpt-5-chat-latest",
         ModelInfo(rate={"input": 1.25, "output": 10.0}, context_window=128_000),
     ),
+    (
+        "gpt-5.4-mini",
+        ModelInfo(rate={"input": 0.75, "output": 4.50}, context_window=400_000),
+    ),
+    (
+        "gpt-5.4",
+        ModelInfo(rate={"input": 2.50, "output": 15.0}, context_window=1_050_000),
+    ),
     (
         "gpt-5-nano",
         ModelInfo(rate={"input": 0.05, "output": 0.40}, context_window=400_000),
@@ -62,18 +73,9 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
         "claude-sonnet-4-5",
         ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=200_000),
     ),
-    (
-        "claude-sonnet-4-6",
-        ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000),
-    ),
-    (
-        "claude-opus-4-6",
-        ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
-    ),
-    (
-        "claude-haiku-4-5",
-        ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000),
-    ),
+    ("claude-sonnet-4-6", ModelInfo(rate={"input": 3.0, "output": 15.0}, context_window=1_000_000)),
+    ("claude-opus-4-6", ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=1_000_000)),
+    ("claude-haiku-4-5", ModelInfo(rate={"input": 1.0, "output": 5.0}, context_window=200_000)),
     (
         "claude-opus-4-5-20251101",
         ModelInfo(rate={"input": 5.0, "output": 25.0}, context_window=200_000),
@@ -108,6 +110,13 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
             context_window=1_048_576,
         ),
     ),
+    (
+        "gemini-3.1-pro-preview",
+        ModelInfo(
+            rate={"input": 2.00, "output": 12.00},
+            context_window=1_048_576,
+        ),
+    ),
     (
         "gemini-3-flash-preview",
         ModelInfo(
@@ -146,12 +155,27 @@ _REGISTRY: list[tuple[str, ModelInfo]] = [
 ]
+def normalize_model_name(model_name: str) -> str:
+    """Normalize provider-specific naming differences before registry lookup."""
+    return _CLAUDE_VERSION_PATTERN.sub(r"\1-\2", model_name)
+def model_names_match(expected: str, actual: str) -> bool:
+    """Match a configured model name against a provider-reported model string."""
+    normalized_expected = normalize_model_name(expected)
+    normalized_actual = normalize_model_name(actual)
+    return normalized_expected in normalized_actual
 @lru_cache(maxsize=None)
 def get_model_info(model_name: str) -> Optional[ModelInfo]:
+    normalized_model_name = normalize_model_name(model_name)
     for registry_model_name, info in sorted(
         _REGISTRY, key=lambda entry: len(entry[0]), reverse=True
     ):
-        if registry_model_name in model_name:
+        if registry_model_name in normalized_model_name:
             return info
     return None

docent_python-0.1.59a0/docent/_llm_util/providers/preference_types.py ADDED Viewed

@@ -0,0 +1,268 @@
+"""Provides preferences of which LLM models to use for different Docent functions."""
+from functools import cached_property
+from typing import Literal
+from pydantic import BaseModel, ConfigDict
+from docent._llm_util.model_registry import get_context_window
+from docent._log_util import get_logger
+logger = get_logger(__name__)
+class ModelOption(BaseModel):
+    """Configuration for a specific model from a provider. Not to be confused with ModelInfo.
+    Attributes:
+        provider: The name of the LLM provider (e.g., "openai", "anthropic").
+        model_name: The specific model to use from the provider.
+        reasoning_effort: Optional indication of computational effort to use.
+    """
+    model_config = ConfigDict(extra="ignore")
+    provider: str
+    model_name: str
+    reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
+class ModelOptionWithContext(BaseModel):
+    """Enhanced model option that includes context window information for frontend use.
+    Not to be confused with ModelInfo or ModelOption.
+    Attributes:
+        provider: The name of the LLM provider (e.g., "openai", "anthropic").
+        model_name: The specific model to use from the provider.
+        reasoning_effort: Optional indication of computational effort to use.
+        context_window: The context window size in tokens.
+        uses_byok: Whether this model would use the user's own API key.
+    """
+    provider: str
+    model_name: str
+    reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None
+    context_window: int
+    uses_byok: bool
+    @classmethod
+    def from_model_option(
+        cls, model_option: ModelOption, uses_byok: bool = False
+    ) -> "ModelOptionWithContext":
+        """Create a ModelOptionWithContext from a ModelOption.
+        Args:
+            model_option: The base model option
+            uses_byok: Whether this model requires bring-your-own-key
+        Returns:
+            ModelOptionWithContext with context window looked up from global mapping
+        """
+        context_window = get_context_window(model_option.model_name)
+        return cls(
+            provider=model_option.provider,
+            model_name=model_option.model_name,
+            reasoning_effort=model_option.reasoning_effort,
+            context_window=context_window,
+            uses_byok=uses_byok,
+        )
+def merge_models_with_byok(
+    defaults: list[ModelOption],
+    byok: list[ModelOption],
+    api_keys: dict[str, str] | None,
+) -> list[ModelOptionWithContext]:
+    user_keys = api_keys or {}
+    merged: list[ModelOption] = list(defaults)
+    if user_keys:
+        merged.extend([m for m in byok if m.provider in user_keys])
+    return [ModelOptionWithContext.from_model_option(m, m.provider in user_keys) for m in merged]
+class PublicProviderPreferences(BaseModel):
+    @cached_property
+    def default_judge_models(self) -> list[ModelOption]:
+        """Judge models that any user can access without providing their own API key"""
+        return [
+            ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="low"),
+            ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="medium"),
+            ModelOption(provider="openai", model_name="gpt-5.4-mini", reasoning_effort="high"),
+            ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="low"),
+            ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="medium"),
+            ModelOption(provider="openai", model_name="gpt-5.4", reasoning_effort="high"),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-haiku-4-5",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-haiku-4-5",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-haiku-4-5",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-opus-4-6",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-opus-4-6",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-opus-4-6",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-sonnet-4-6",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-sonnet-4-6",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="anthropic",
+                model_name="claude-sonnet-4-6",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="google",
+                model_name="gemini-3-flash-preview",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="google",
+                model_name="gemini-3-flash-preview",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="google",
+                model_name="gemini-3-flash-preview",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="google",
+                model_name="gemini-3.1-pro-preview",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="google",
+                model_name="gemini-3.1-pro-preview",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="google",
+                model_name="gemini-3.1-pro-preview",
+                reasoning_effort="high",
+            ),
+            # Open Router equivalents
+            ModelOption(
+                provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="low"
+            ),
+            ModelOption(
+                provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="medium"
+            ),
+            ModelOption(
+                provider="openrouter", model_name="openai/gpt-5.4-mini", reasoning_effort="high"
+            ),
+            ModelOption(provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="low"),
+            ModelOption(
+                provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="medium"
+            ),
+            ModelOption(
+                provider="openrouter", model_name="openai/gpt-5.4", reasoning_effort="high"
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-haiku-4.5",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-haiku-4.5",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-haiku-4.5",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-opus-4.6",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-opus-4.6",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-opus-4.6",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-sonnet-4.6",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-sonnet-4.6",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="anthropic/claude-sonnet-4.6",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="google/gemini-3-flash-preview",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="google/gemini-3-flash-preview",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="google/gemini-3-flash-preview",
+                reasoning_effort="high",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="google/gemini-3.1-pro-preview",
+                reasoning_effort="low",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="google/gemini-3.1-pro-preview",
+                reasoning_effort="medium",
+            ),
+            ModelOption(
+                provider="openrouter",
+                model_name="google/gemini-3.1-pro-preview",
+                reasoning_effort="high",
+            ),
+        ]
+PUBLIC_PROVIDER_PREFERENCES = PublicProviderPreferences()

{docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/data_models/reading.py RENAMED Viewed

@@ -185,6 +185,25 @@ class ReadingStep(BaseModel):
     approved_at: datetime | None = None
     submitted_at: datetime | None = None
+    def to_submission(self, *, dql_query: str | None = None) -> "ReadingStepSubmission":
+        """Convert to a ReadingStepSubmission for resolve_reading_entry.
+        Optionally overrides dql_query (e.g. after alias substitution).
+        """
+        return ReadingStepSubmission(
+            alias=self.alias,
+            name=self.name,
+            model=self.model,
+            output_schema=self.output_schema,
+            max_new_tokens=self.max_new_tokens,
+            user_metadata=self.user_metadata,
+            prompt_template_segments=self.prompt_template_segments,
+            context_config=self.context_config,
+            dql_query=dql_query if dql_query is not None else self.dql_query,
+            source_reading_preset_id=self.source_reading_preset_id,
+            cache_mode=self.cache_mode,
+        )
 PlanStep: TypeAlias = BeginGroupStep | EndGroupStep | DqlOnlyStep | ReadingStep
@@ -285,7 +304,6 @@ class PlanSubmissionRequest(BaseModel):
     plan_name: str | None = None
     source_script: str | None = None
     entries: list[PlanStepSubmission]
-    upsert_by_name: bool = False
 class PlanStepSubmissionStatus(BaseModel):

{docent_python-0.1.57a0 → docent_python-0.1.59a0}/docent/sdk/client.py RENAMED Viewed

@@ -5,10 +5,9 @@ import os
 import sys
 import time
 import webbrowser
-from itertools import islice
 from pathlib import Path
 from textwrap import dedent
-from typing import IO, TYPE_CHECKING, Any, Iterable, Iterator, Literal, TypeVar, cast
+from typing import IO, TYPE_CHECKING, Any, Iterator, Literal, cast
 from urllib.parse import urlsplit
 if TYPE_CHECKING:
@@ -46,7 +45,7 @@ from docent.data_models.reading import (
     StepGroupSubmission,
 )
 from docent.judges.util.meta_schema import validate_judge_result_schema
-from docent.loaders import load_inspect
+from docent.sdk.integrations.inspect import ingest_inspect_directory
 from docent.sdk.llm_context import ContextItemRef, LLMContext, LLMContextItem, Prompt
 from docent.sdk.llm_request import ExternalAnalysisResult, LLMRequest
 from docent.sdk.reading import (
@@ -63,25 +62,17 @@ from docent.sdk.reading import (
     _PendingReading,  # pyright: ignore[reportPrivateUsage]
     _PendingStepGroup,  # pyright: ignore[reportPrivateUsage]
 )
+from docent.sdk.util import batched as _batched
 MAX_AGENT_RUN_PAYLOAD_BYTES = 100 * 1024 * 1024  # 100MB backend limit
 _AGENT_RUNS_PAYLOAD_PREFIX = b'{"agent_runs":['
 _AGENT_RUNS_PAYLOAD_SUFFIX = b"]}"
+batched = _batched
-_T = TypeVar("_T")
 _LOCAL_DOMAINS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
-def batched(iterable: Iterable[_T], n: int) -> Iterator[tuple[_T, ...]]:
-    """Backport of itertools.batched for Python <3.12."""
-    if n < 1:
-        raise ValueError("n must be at least one")
-    it = iter(iterable)
-    while batch := tuple(islice(it, n)):
-        yield batch
 def _domain_host(domain: str) -> str:
     """Extract normalized host from a domain string, handling optional port and IPv6 brackets."""
     normalized = domain.strip().lower()
@@ -373,7 +364,6 @@ class Docent:
         self._plan_name_sent: bool = False
         self._is_notebook: bool = False
         self._notebook_hook_registered: bool = False
-        self._flushed_names: set[str] = set()
         self._register_notebook_hook()
@@ -2057,67 +2047,20 @@ class Docent:
             ValueError: If the path doesn't exist or isn't a directory.
             requests.exceptions.HTTPError: If any API requests fail.
         """
-        root_path = Path(fpath)
-        if not root_path.exists():
-            raise ValueError(f"Path does not exist: {fpath}")
-        if not root_path.is_dir():
-            raise ValueError(f"Path is not a directory: {fpath}")
-        # Find all .eval files recursively
-        eval_files = list(root_path.rglob("*.eval"))
-        if not eval_files:
-            self._logger.info(f"No .eval files found in {fpath}")
-            return
-        self._logger.info(f"Found {len(eval_files)} .eval files in {fpath}")
-        total_runs_added = 0
-        batch_size = 100
-        # Process each .eval file
-        for eval_file in tqdm(eval_files, desc="Processing .eval files", unit="files"):
-            # Get total samples for progress tracking
-            total_samples = load_inspect.get_total_samples(eval_file, format="eval")
-            if total_samples == 0:
-                self._logger.info(f"No samples found in {eval_file}")
-                continue
-            # Load runs from file
-            with open(eval_file, "rb") as f:
-                _, runs_generator = load_inspect.runs_from_file(f, format="eval")
-                # Process runs in batches
-                runs_from_file = 0
-                batches = batched(runs_generator, batch_size)
-                with tqdm(
-                    total=total_samples,
-                    desc=f"Processing {eval_file.name}",
-                    unit="runs",
-                    leave=False,
-                ) as file_pbar:
-                    for batch in batches:
-                        batch_list = list(batch)  # Convert generator batch to list
-                        if not batch_list:
-                            break
-                        # Add batch to collection
-                        url = f"{self._api_url}/{collection_id}/agent_runs"
-                        payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
-                        response = self._session.post(url, json=payload)
-                        self._handle_response_errors(response)
-                        runs_from_file += len(batch_list)
-                        file_pbar.update(len(batch_list))
-            total_runs_added += runs_from_file
-            self._logger.info(f"Added {runs_from_file} runs from {eval_file}")
-        self._logger.info(
-            f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"
+        def _upload_agent_run_batch(agent_runs: list[AgentRun]) -> None:
+            url = f"{self._api_url}/{collection_id}/agent_runs"
+            payload = {
+                "agent_runs": [agent_run.model_dump(mode="json") for agent_run in agent_runs]
+            }
+            response = self._session.post(url, json=payload)
+            self._handle_response_errors(response)
+        ingest_inspect_directory(
+            collection_id,
+            fpath,
+            upload_agent_run_batch=_upload_agent_run_batch,
+            logger=self._logger,
         )
     def start_chat(
@@ -2279,7 +2222,10 @@ class Docent:
         output_schema: dict[str, Any] | None = None,
         max_concurrency: int | None = None,
     ) -> dict[str, Any]:
-        """Submit LLM requests for processing.
+        """
+        Deprecated - use readings instead.
+        Submit LLM requests for processing.
         Creates a result set and submits requests for background LLM processing.
         Prints the result set URL and returns submission details.
@@ -2380,7 +2326,10 @@ class Docent:
         result_set_name: str | None = None,
         exists_ok: bool = False,
     ) -> dict[str, Any]:
-        """Submit pre-computed results directly.
+        """
+        Deprecated.
+        Submit pre-computed results directly.
         For use when you've run analysis locally (e.g., with a local LLM)
         and want to upload the results to Docent for viewing.
@@ -2431,7 +2380,10 @@ class Docent:
         collection_id: str,
         name_or_id: str,
     ) -> dict[str, Any]:
-        """Get a result set by name or ID.
+        """
+        Deprecated - use readings instead.
+        Get a result set by name or ID.
         Args:
             collection_id: ID of the Collection.
@@ -2456,7 +2408,10 @@ class Docent:
         with_auto_joins: bool = False,
         include_incomplete: bool = False,
     ) -> "pd.DataFrame":
-        """Get result set contents as a pandas DataFrame.
+        """
+        Deprecated - use readings instead.
+        Get result set contents as a pandas DataFrame.
         Args:
             collection_id: ID of the Collection.
@@ -2563,7 +2518,10 @@ class Docent:
         collection_id: str,
         name_or_id: str,
     ) -> str:
-        """Open a result set in the browser.
+        """
+        Deprecated - use readings instead.
+        Open a result set in the browser.
         Args:
             collection_id: ID of the Collection.
@@ -2665,14 +2623,6 @@ class Docent:
         return alias
     def _enqueue_pending(self, entry: PendingEntry) -> None:
-        """Add a pending entry, replacing any existing entry with the same name."""
-        name: str | None = getattr(entry, "name", None)
-        if name is not None:
-            for i, existing in enumerate(self._pending):
-                existing_name: str | None = getattr(existing, "name", None)
-                if existing_name == name:
-                    self._pending[i] = entry
-                    return
         self._pending.append(entry)
     def _register_atexit(self) -> None:
@@ -2900,11 +2850,13 @@ class Docent:
                 param_name = seg.column_name
                 param_type = seg.type_annotation or "unknown"
+                # Unknown type means "defer to server-side inference"; false here does not
+                # mean the caller explicitly declared a scalar placeholder.
                 segments.append(
                     {
                         "param_name": param_name,
                         "param_type": param_type,
-                        "is_list": False,
+                        "is_list": seg.is_list_annotation if seg.type_annotation else False,
                     }
                 )
                 if context_config and param_name not in param_configs:
@@ -3119,14 +3071,11 @@ class Docent:
             else None
         )
-        upsert_by_name = bool(self._flushed_names)
         request_body = PlanSubmissionRequest(
             plan_id=self._plan_id,
             plan_name=plan_name,
             source_script=source_script,
             entries=entries,
-            upsert_by_name=upsert_by_name,
         )
         is_first_flush_for_plan = self._plan_id is None
@@ -3141,10 +3090,6 @@ class Docent:
         self._flushed_collection_id = collection_id
         self._plan_name_sent = True
-        for p in self._pending:
-            entry_name: str | None = getattr(p, "name", None)
-            if entry_name is not None:
-                self._flushed_names.add(entry_name)
         self._pending.clear()
         for status_entry in result.get("entry_statuses", []):

docent_python-0.1.59a0/docent/sdk/integrations/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""Integrations for converting and ingesting external trace formats."""
+from docent.sdk.integrations.harbor import (
+    convert_atif_to_agent_run,
+    convert_harbor_directory_to_agent_runs,
+    convert_harbor_trial_to_agent_run,
+)
+from docent.sdk.integrations.inspect import (
+    convert_inspect_directory_to_agent_runs,
+    convert_inspect_eval_file_to_agent_runs,
+    ingest_inspect_directory,
+)
+from docent.sdk.integrations.nemogym import (
+    convert_nemogym_jsonl_file_to_agent_runs,
+    convert_nemogym_rollout_to_agent_run,
+)
+__all__ = [
+    "convert_atif_to_agent_run",
+    "convert_harbor_directory_to_agent_runs",
+    "convert_harbor_trial_to_agent_run",
+    "convert_inspect_directory_to_agent_runs",
+    "convert_inspect_eval_file_to_agent_runs",
+    "convert_nemogym_jsonl_file_to_agent_runs",
+    "convert_nemogym_rollout_to_agent_run",
+    "ingest_inspect_directory",
+]

docent-python 0.1.57a0__tar.gz → 0.1.59a0__tar.gz

docent-python 0.1.57a0tar.gz → 0.1.59a0tar.gz