PyPI - docent-python - Versions diffs - 0.1.0a8__tar.gz → 0.1.0a9__tar.gz - Mend

docent-python 0.1.0a8tar.gz → 0.1.0a9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{docent_python-0.1.0a8 → docent_python-0.1.0a9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.0a8
+Version: 0.1.0a9
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.0a8 → docent_python-0.1.0a9}/docent/data_models/__init__.py RENAMED Viewed

@@ -1,6 +1,10 @@
 from docent.data_models.agent_run import AgentRun
 from docent.data_models.citation import Citation
-from docent.data_models.metadata import BaseAgentRunMetadata, BaseMetadata
+from docent.data_models.metadata import (
+    BaseAgentRunMetadata,
+    BaseMetadata,
+    InspectAgentRunMetadata,
+)
 from docent.data_models.regex import RegexSnippet
 from docent.data_models.transcript import Transcript
@@ -10,5 +14,6 @@ __all__ = [
     "RegexSnippet",
     "BaseAgentRunMetadata",
     "BaseMetadata",
+    "InspectAgentRunMetadata",
     "Transcript",
 ]

{docent_python-0.1.0a8 → docent_python-0.1.0a9}/docent/data_models/metadata.py RENAMED Viewed

@@ -218,7 +218,7 @@ class InspectAgentRunMetadata(BaseAgentRunMetadata):
     # Parameters for the run
     model: str = Field(description="The model that was used to generate the transcript")
-    # Outcome
+    # Scoring
     scoring_metadata: dict[str, Any] | None = Field(
         description="Additional metadata about the scoring process"
     )

docent_python-0.1.0a9/docent/loaders/load_inspect.py ADDED Viewed

@@ -0,0 +1,76 @@
+from inspect_ai.log import EvalLog
+from inspect_ai.scorer import CORRECT, INCORRECT, NOANSWER, PARTIAL, Score
+from docent.data_models import AgentRun, InspectAgentRunMetadata, Transcript
+from docent.data_models.chat import parse_chat_message
+def _normalize_inspect_score(score: Score) -> float | None:
+    """
+    Normalize an inspect score to a float. This implements the same logic as inspect_ai.scorer._metric.value_to_float, but fails more conspicuously.
+    Args:
+        score: The inspect score to normalize.
+    Returns:
+        The normalized score as a float, or None if the score is not a valid value.
+    """
+    if isinstance(score.value, int | float | bool):
+        return float(score.value)
+    elif score.value == CORRECT:
+        return 1.0
+    elif score.value == PARTIAL:
+        return 0.5
+    elif score.value == INCORRECT or score.value == NOANSWER:
+        return 0
+    elif isinstance(score.value, str):
+        value = score.value.lower()
+        if value in ["yes", "true"]:
+            return 1.0
+        elif value in ["no", "false"]:
+            return 0.0
+        elif value.replace(".", "").isnumeric():
+            return float(value)
+    raise ValueError(f"Unknown score value: {score.value}")
+def load_inspect_log(log: EvalLog) -> list[AgentRun]:
+    if log.samples is None:
+        return []
+    agent_runs: list[AgentRun] = []
+    for s in log.samples:
+        sample_id = s.id
+        epoch_id = s.epoch
+        if s.scores is None:
+            sample_scores = {}
+        else:
+            sample_scores = {k: _normalize_inspect_score(v) for k, v in s.scores.items()}
+        metadata = InspectAgentRunMetadata(
+            task_id=log.eval.task,
+            sample_id=str(sample_id),
+            epoch_id=epoch_id,
+            model=log.eval.model,
+            additional_metadata=s.metadata,
+            scores=sample_scores,
+            # Scores could have answers, explanations, and other metadata besides the values we extract
+            scoring_metadata=s.scores,
+        )
+        agent_runs.append(
+            AgentRun(
+                transcripts={
+                    "main": Transcript(
+                        messages=[parse_chat_message(m.model_dump()) for m in s.messages]
+                    )
+                },
+                metadata=metadata,
+            )
+        )
+    return agent_runs

{docent_python-0.1.0a8 → docent_python-0.1.0a9}/docent/sdk/client.py RENAMED Viewed

@@ -4,7 +4,7 @@ from typing import Any
 import requests
 from docent._log_util.logger import get_logger
-from docent.data_models.agent_run import AgentRun
+from docent.data_models.agent_run import AgentRun, AgentRunWithoutMetadataValidator
 logger = get_logger(__name__)
@@ -268,3 +268,26 @@ class Docent:
         response = self._session.post(url, json={"centroid": centroid})
         response.raise_for_status()
         return response.json()
+    def get_agent_run(self, collection_id: str, agent_run_id: str) -> AgentRun | None:
+        """Get a specific agent run by its ID.
+        Args:
+            collection_id: ID of the Collection.
+            agent_run_id: The ID of the agent run to retrieve.
+        Returns:
+            dict: Dictionary containing the agent run information.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/{collection_id}/agent_run"
+        response = self._session.get(url, params={"agent_run_id": agent_run_id})
+        response.raise_for_status()
+        if response.json() is None:
+            return None
+        else:
+            # We do this to avoid metadata validation failing
+            # TODO(mengk): kinda hacky
+            return AgentRunWithoutMetadataValidator.model_validate(response.json())

{docent_python-0.1.0a8 → docent_python-0.1.0a9}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.0-alpha.8"
+version = "0.1.0-alpha.9"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]