PyPI - docent-python - Versions diffs - 0.1.35a0__tar.gz - Mend

docent-python 0.1.35a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

docent_python-0.1.35a0/.gitignore +200 -0
docent_python-0.1.35a0/LICENSE.md +13 -0
docent_python-0.1.35a0/PKG-INFO +33 -0
docent_python-0.1.35a0/README.md +21 -0
docent_python-0.1.35a0/docent/__init__.py +4 -0
docent_python-0.1.35a0/docent/_llm_util/__init__.py +0 -0
docent_python-0.1.35a0/docent/_llm_util/data_models/__init__.py +0 -0
docent_python-0.1.35a0/docent/_llm_util/data_models/exceptions.py +48 -0
docent_python-0.1.35a0/docent/_llm_util/data_models/llm_output.py +331 -0
docent_python-0.1.35a0/docent/_llm_util/llm_cache.py +193 -0
docent_python-0.1.35a0/docent/_llm_util/llm_svc.py +472 -0
docent_python-0.1.35a0/docent/_llm_util/model_registry.py +130 -0
docent_python-0.1.35a0/docent/_llm_util/providers/__init__.py +0 -0
docent_python-0.1.35a0/docent/_llm_util/providers/anthropic.py +537 -0
docent_python-0.1.35a0/docent/_llm_util/providers/common.py +41 -0
docent_python-0.1.35a0/docent/_llm_util/providers/google.py +530 -0
docent_python-0.1.35a0/docent/_llm_util/providers/openai.py +745 -0
docent_python-0.1.35a0/docent/_llm_util/providers/openrouter.py +375 -0
docent_python-0.1.35a0/docent/_llm_util/providers/preference_types.py +104 -0
docent_python-0.1.35a0/docent/_llm_util/providers/provider_registry.py +164 -0
docent_python-0.1.35a0/docent/_log_util/__init__.py +3 -0
docent_python-0.1.35a0/docent/_log_util/logger.py +141 -0
docent_python-0.1.35a0/docent/data_models/__init__.py +16 -0
docent_python-0.1.35a0/docent/data_models/_tiktoken_util.py +91 -0
docent_python-0.1.35a0/docent/data_models/agent_run.py +456 -0
docent_python-0.1.35a0/docent/data_models/chat/__init__.py +31 -0
docent_python-0.1.35a0/docent/data_models/chat/content.py +56 -0
docent_python-0.1.35a0/docent/data_models/chat/message.py +132 -0
docent_python-0.1.35a0/docent/data_models/chat/tool.py +109 -0
docent_python-0.1.35a0/docent/data_models/citation.py +233 -0
docent_python-0.1.35a0/docent/data_models/collection.py +23 -0
docent_python-0.1.35a0/docent/data_models/judge.py +19 -0
docent_python-0.1.35a0/docent/data_models/metadata_util.py +16 -0
docent_python-0.1.35a0/docent/data_models/regex.py +56 -0
docent_python-0.1.35a0/docent/data_models/remove_invalid_citation_ranges.py +176 -0
docent_python-0.1.35a0/docent/data_models/shared_types.py +10 -0
docent_python-0.1.35a0/docent/data_models/transcript.py +465 -0
docent_python-0.1.35a0/docent/data_models/util.py +170 -0
docent_python-0.1.35a0/docent/judges/__init__.py +23 -0
docent_python-0.1.35a0/docent/judges/analysis.py +77 -0
docent_python-0.1.35a0/docent/judges/impl.py +587 -0
docent_python-0.1.35a0/docent/judges/runner.py +129 -0
docent_python-0.1.35a0/docent/judges/stats.py +205 -0
docent_python-0.1.35a0/docent/judges/types.py +311 -0
docent_python-0.1.35a0/docent/judges/util/forgiving_json.py +108 -0
docent_python-0.1.35a0/docent/judges/util/meta_schema.json +86 -0
docent_python-0.1.35a0/docent/judges/util/meta_schema.py +29 -0
docent_python-0.1.35a0/docent/judges/util/parse_output.py +87 -0
docent_python-0.1.35a0/docent/judges/util/voting.py +139 -0
docent_python-0.1.35a0/docent/loaders/load_inspect.py +215 -0
docent_python-0.1.35a0/docent/py.typed +0 -0
docent_python-0.1.35a0/docent/samples/__init__.py +3 -0
docent_python-0.1.35a0/docent/samples/load.py +9 -0
docent_python-0.1.35a0/docent/samples/log.eval +0 -0
docent_python-0.1.35a0/docent/samples/tb_airline.json +1 -0
docent_python-0.1.35a0/docent/sdk/__init__.py +0 -0
docent_python-0.1.35a0/docent/sdk/agent_run_writer.py +317 -0
docent_python-0.1.35a0/docent/sdk/client.py +751 -0
docent_python-0.1.35a0/docent/trace.py +2752 -0
docent_python-0.1.35a0/docent/trace_temp.py +1086 -0
docent_python-0.1.35a0/pyproject.toml +54 -0
docent_python-0.1.35a0/uv.lock +2541 -0

docent_python-0.1.35a0/.gitignore ADDED Viewed

@@ -0,0 +1,200 @@
+**/*_gitignore.*
+**/*_gitignore/
+*.db
+.stignore
+*syncthing*
+.DS_Store
+# *.sql  (neil: disabled for ursid)
+*.gz
+*.tgz
+*.tfstate
+*.tfstate.backup
+*/.terraform/
+*/*.terraform.*
+.idea/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.env.*
+!.env.template
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# wandb
+**/wandb/
+# Marimo notebook outputs
+**/__marimo__/
+# yarn
+**/.yarn/
+**/.pnp.*
+# data
+*.npy
+*.csv
+*.pkl
+# personal
+inspect_evals
+*.swp
+# test data cache
+data/cache

docent_python-0.1.35a0/LICENSE.md ADDED Viewed

@@ -0,0 +1,13 @@
+Copyright 2025 Clarity AI Research Inc., dba Transluce
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

docent_python-0.1.35a0/PKG-INFO ADDED Viewed

@@ -0,0 +1,33 @@
+Metadata-Version: 2.4
+Name: docent-python
+Version: 0.1.35a0
+Summary: Docent SDK
+Project-URL: Homepage, https://github.com/TransluceAI/docent
+Project-URL: Issues, https://github.com/TransluceAI/docent/issues
+Project-URL: Docs, https://transluce-docent.readthedocs-hosted.com/en/latest
+Author-email: Transluce <info@transluce.org>
+License-Expression: Apache-2.0
+License-File: LICENSE.md
+Requires-Python: >=3.11
+Requires-Dist: anthropic>=0.47.0
+Requires-Dist: backoff>=2.2.1
+Requires-Dist: google-genai>=1.16.1
+Requires-Dist: inspect-ai>=0.3.132
+Requires-Dist: jsonschema>=4.24.0
+Requires-Dist: openai>=1.68.0
+Requires-Dist: opentelemetry-api>=1.34.1
+Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.34.1
+Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.34.1
+Requires-Dist: opentelemetry-instrumentation-anthropic>=0.40.14
+Requires-Dist: opentelemetry-instrumentation-bedrock>=0.40.14
+Requires-Dist: opentelemetry-instrumentation-google-generativeai>=0.40.14
+Requires-Dist: opentelemetry-instrumentation-langchain>=0.40.14
+Requires-Dist: opentelemetry-instrumentation-openai>=0.40.14
+Requires-Dist: opentelemetry-instrumentation-threading>=0.55b1
+Requires-Dist: opentelemetry-sdk>=1.34.1
+Requires-Dist: orjson>=3.11.3
+Requires-Dist: pandas>=2.3.3
+Requires-Dist: pydantic>=2.11.7
+Requires-Dist: pyyaml>=6.0.2
+Requires-Dist: tiktoken>=0.7.0
+Requires-Dist: tqdm>=4.67.1

docent_python-0.1.35a0/README.md ADDED Viewed

@@ -0,0 +1,21 @@
+!!! note
+    Docent remains in alpha. The API is subject to change.
+# Docent Python SDK
+The official Python SDK for [Docent](https://github.com/TransluceAI/docent) - a platform for analyzing and visualizing AI agent execution traces.
+## Overview
+Docent helps you understand AI agent behavior by providing tools to collect, analyze, and visualize agent execution data. This SDK allows you to programmatically interact with the Docent platform to:
+- Create and manage collections of agent runs
+- Upload agent execution traces and transcripts
+- Define custom dimensions and filters
+- Perform searches and analyses on agent behavior
+## Installation
+```bash
+pip install docent-python
+```

docent_python-0.1.35a0/docent/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+__all__ = ["Docent", "init"]
+from docent.sdk.agent_run_writer import init
+from docent.sdk.client import Docent

docent_python-0.1.35a0/docent/_llm_util/__init__.py ADDED Viewed

File without changes

docent_python-0.1.35a0/docent/_llm_util/data_models/__init__.py ADDED Viewed

File without changes

docent_python-0.1.35a0/docent/_llm_util/data_models/exceptions.py ADDED Viewed

@@ -0,0 +1,48 @@
+class LLMException(Exception):
+    error_type_id = "other"
+    user_message = "The model failed to respond. Please try again later."
+class CompletionTooLongException(LLMException):
+    error_type_id = "completion_too_long"
+    user_message = "Completion too long."
+class RateLimitException(LLMException):
+    error_type_id = "rate_limit"
+    user_message = "Rate limited by the model provider. Please wait and try again."
+class ContextWindowException(LLMException):
+    error_type_id = "context_window"
+    user_message = "Context window exceeded."
+class NoResponseException(LLMException):
+    error_type_id = "no_response"
+    user_message = "The model returned an empty response. Please try again later."
+class DocentUsageLimitException(LLMException):
+    error_type_id = "docent_usage_limit"
+    user_message = "Free daily usage limit reached. Add your own API key in settings or contact us for increased limits."
+class ValidationFailedException(LLMException):
+    error_type_id = "validation_failed"
+    user_message = "The model returned invalid output that failed validation."
+    def __init__(self, message: str = "", failed_output: str | None = None):
+        super().__init__(message)
+        self.failed_output = failed_output
+LLM_ERROR_TYPES: list[type[LLMException]] = [
+    LLMException,
+    CompletionTooLongException,
+    RateLimitException,
+    ContextWindowException,
+    NoResponseException,
+    DocentUsageLimitException,
+    ValidationFailedException,
+]

docent_python-0.1.35a0/docent/_llm_util/data_models/llm_output.py ADDED Viewed

@@ -0,0 +1,331 @@
+import json
+from dataclasses import dataclass, field
+from typing import Any, Literal, Protocol, cast
+from openai.types.chat.chat_completion_token_logprob import TopLogprob
+from pydantic import BaseModel
+from docent._llm_util.data_models.exceptions import (
+    LLM_ERROR_TYPES,
+    CompletionTooLongException,
+    ContextWindowException,
+    LLMException,
+)
+from docent._log_util import get_logger
+from docent.data_models.chat import ToolCall
+logger = get_logger(__name__)
+FinishReasonType = Literal[
+    "error",
+    "stop",
+    "length",
+    "tool_calls",
+    "content_filter",
+    "function_call",
+    "streaming",
+    "refusal",
+]
+"""Possible reasons for an LLM completion to finish."""
+TokenType = Literal["input", "output", "cache_read", "cache_write"]
+class UsageMetrics:
+    _usage: dict[TokenType, int]
+    def __init__(self, **kwargs: int | None):
+        filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
+        self._usage = cast(dict[TokenType, int], filtered_kwargs)
+    def __getitem__(self, key: TokenType) -> int:
+        return self._usage.get(key, 0)
+    def __setitem__(self, key: TokenType, value: int):
+        self._usage[key] = value
+    def to_dict(self) -> dict[TokenType, int]:
+        # Filter out 0 values to avoid cluttering the database
+        return {k: v for k, v in self._usage.items() if v != 0}
+    @property
+    def total_tokens(self) -> int:
+        return self["input"] + self["output"]
+class LLMCompletion(BaseModel):
+    """A single completion from an LLM.
+    Attributes:
+        text: The generated text content.
+        tool_calls: List of tool calls made during the completion.
+        finish_reason: Reason why the completion finished.
+        top_logprobs: Probability distribution for top token choices.
+    """
+    text: str | None = None
+    tool_calls: list[ToolCall] | None = None
+    finish_reason: FinishReasonType | None = None
+    top_logprobs: list[list[TopLogprob]] | None = None
+    reasoning_tokens: str | None = None
+    @property
+    def no_text(self) -> bool:
+        """Check if the completion has no text.
+        Returns:
+            bool: True if text is None or empty, False otherwise.
+        """
+        return self.text is None or len(self.text) == 0
+@dataclass
+class LLMOutput:
+    """Container for LLM output, potentially with multiple completions.
+    Aggregates completions from an LLM along with metadata and error information.
+    Attributes:
+        model: The name/identifier of the model used.
+        completions: List of individual completions.
+        errors: List of error types encountered during generation.
+    """
+    model: str
+    completions: list[LLMCompletion]
+    errors: list[LLMException] = field(default_factory=list)
+    usage: UsageMetrics = field(default_factory=UsageMetrics)
+    from_cache: bool = False
+    duration: float | None = None
+    @property
+    def non_empty(self) -> bool:
+        """Check if there are any completions.
+        Returns:
+            bool: True if there's at least one completion, False otherwise.
+        """
+        return len(self.completions) > 0
+    @property
+    def first(self) -> LLMCompletion | None:
+        """Get the first completion if available.
+        Returns:
+            LLMCompletion | None: The first completion or None if no completions exist.
+        """
+        return self.completions[0] if self.non_empty else None
+    @property
+    def first_text(self) -> str | None:
+        """Get the text of the first completion if available.
+        Returns:
+            str | None: The text of the first completion or None if no completion exists.
+        """
+        return self.first.text if self.first else None
+    @property
+    def did_error(self) -> bool:
+        """Check if any errors occurred during generation.
+        Returns:
+            bool: True if there were errors, False otherwise.
+        """
+        return bool(self.errors)
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "model": self.model,
+            "completions": [comp.model_dump() for comp in self.completions],
+            "errors": [e.error_type_id for e in self.errors],
+            "usage": self.usage.to_dict(),
+            "from_cache": self.from_cache,
+            "duration": self.duration,
+        }
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "LLMOutput":
+        error_type_map = {e.error_type_id: e for e in LLM_ERROR_TYPES}
+        errors = data.get("errors", [])
+        error_types_to_not_log: list[str] = [
+            CompletionTooLongException.error_type_id,
+            ContextWindowException.error_type_id,
+        ]
+        errors_to_log = [e for e in errors if e not in error_types_to_not_log]
+        if errors_to_log:
+            logger.error(f"Loading LLM output with errors: {errors}")
+        errors = [error_type_map.get(e, LLMException)() for e in errors]
+        completions = data.get("completions", [])
+        completions = [LLMCompletion.model_validate(comp) for comp in completions]
+        usage: dict[TokenType, int] = {}
+        if data_usage := data.get("usage"):
+            usage = cast(dict[TokenType, int], data_usage)
+        return cls(
+            model=data["model"],
+            completions=completions,
+            errors=errors,
+            usage=UsageMetrics(**usage),
+            from_cache=bool(data.get("from_cache", False)),
+            duration=data.get("duration"),
+        )
+@dataclass
+class ToolCallPartial:
+    """Partial representation of a tool call before full processing.
+    Used as an intermediate format before finalizing into a complete ToolCall.
+    Args:
+        id: The identifier for the tool call.
+        function: The name of the function to call.
+        arguments_raw: Raw JSON string of arguments for the function.
+        type: The type of the tool call, always "function".
+    """
+    id: str | None
+    function: str | None
+    arguments_raw: str | None
+    type: Literal["function"]
+class LLMCompletionPartial(LLMCompletion):
+    """Partial representation of an LLM completion before finalization.
+    Extends LLMCompletion but with tool_calls being a list of ToolCallPartial.
+    This is used during the processing stage before tool calls are fully parsed.
+    Attributes:
+        tool_calls: List of partial tool call representations.
+    """
+    tool_calls: list[ToolCallPartial | None] | None = None  # type: ignore
+class LLMOutputPartial(LLMOutput):
+    """Partial representation of LLM output before finalization.
+    Extends LLMOutput but with completions being a list of LLMCompletionPartial.
+    Used as an intermediate format during processing.
+    Attributes:
+        completions: List of partial completions.
+    """
+    completions: list[LLMCompletionPartial]  # type: ignore
+def finalize_llm_output_partial(partial: LLMOutputPartial) -> LLMOutput:
+    """Convert a partial LLM output into a finalized LLM output.
+    Processes tool calls by parsing their arguments from raw JSON strings,
+    handles errors in JSON parsing, and provides warnings for truncated completions.
+    Args:
+        partial: The partial LLM output to finalize.
+    Returns:
+        LLMOutput: The finalized LLM output with processed tool calls.
+    Raises:
+        CompletionTooLongException: If the completion was truncated due to length
+            and resulted in empty text.
+        ValueError: If tool call ID or function is missing in the partial data.
+    """
+    def _parse_tool_call(tc_partial: ToolCallPartial):
+        if tc_partial.id is None:
+            raise ValueError("Tool call ID not found in partial; check for parsing errors")
+        if tc_partial.function is None:
+            raise ValueError("Tool call function not found in partial; check for parsing errors")
+        arguments: dict[str, Any] = {}
+        # Attempt to load arguments into JSON
+        try:
+            arguments = json.loads(tc_partial.arguments_raw or "{}")
+            parse_error = None
+        # If the tool call arguments are not valid JSON, return an empty dict with the error
+        except Exception as e:
+            arguments = {"__parse_error_raw_args": tc_partial.arguments_raw}
+            parse_error = f"Couldn't parse tool call arguments as JSON: {e}. Original input: {tc_partial.arguments_raw}"
+        return ToolCall(
+            id=tc_partial.id,
+            function=tc_partial.function,
+            arguments=arguments,
+            parse_error=parse_error,
+            type=tc_partial.type,
+        )
+    output = LLMOutput(
+        model=partial.model,
+        completions=[
+            LLMCompletion(
+                text=c.text,
+                tool_calls=[_parse_tool_call(tc) for tc in (c.tool_calls or []) if tc is not None],
+                finish_reason=c.finish_reason,
+                reasoning_tokens=c.reasoning_tokens,
+            )
+            for c in partial.completions
+        ],
+        usage=partial.usage,
+        from_cache=False,
+    )
+    # If the completion is empty and was truncated (likely due to too much reasoning), raise an exception
+    if output.first and output.first.finish_reason == "length" and output.first.no_text:
+        raise CompletionTooLongException(
+            "Completion empty due to truncation. Consider increasing max_new_tokens."
+        )
+    for c in output.completions:
+        if c.finish_reason == "length":
+            logger.warning(
+                "Completion truncated due to length; consider increasing max_new_tokens."
+            )
+    return output
+class AsyncLLMOutputStreamingCallback(Protocol):
+    """Protocol for asynchronous streaming callbacks with batch index.
+    Defines the expected signature for callbacks that handle streaming output
+    with a batch index.
+    Args:
+        batch_index: The index of the current batch.
+        llm_output: The LLM output for the current batch.
+    """
+    async def __call__(
+        self,
+        batch_index: int,
+        llm_output: LLMOutput,
+    ) -> None: ...
+class AsyncSingleLLMOutputStreamingCallback(Protocol):
+    """Protocol for asynchronous streaming callbacks without batch indexing.
+    Defines the expected signature for callbacks that handle streaming output
+    without batch indexing.
+    Args:
+        llm_output: The LLM output to process.
+    """
+    async def __call__(
+        self,
+        llm_output: LLMOutput,
+    ) -> None: ...
+class AsyncEmbeddingStreamingCallback(Protocol):
+    """Protocol for sending progress updates for embedding generation."""
+    async def __call__(self, progress: int) -> None: ...