PyPI - docent-python - Versions diffs - 0.1.5a0__tar.gz → 0.1.7a0__tar.gz - Mend

docent-python 0.1.5a0tar.gz → 0.1.7a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{docent_python-0.1.5a0 → docent_python-0.1.7a0}/.gitignore RENAMED Viewed

@@ -6,6 +6,7 @@
 .DS_Store
 # *.sql  (neil: disabled for ursid)
 *.gz
+*.tgz
 *.tfstate
 *.tfstate.backup

{docent_python-0.1.5a0 → docent_python-0.1.7a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.5a0
+Version: 0.1.7a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.5a0 → docent_python-0.1.7a0}/docent/data_models/agent_run.py RENAMED Viewed

@@ -15,6 +15,7 @@ from pydantic import (
 from docent.data_models._tiktoken_util import get_token_count, group_messages_into_ranges
 from docent.data_models.transcript import (
     Transcript,
+    TranscriptGroup,
     TranscriptWithoutMetadataValidator,
     fake_model_dump,
 )
@@ -36,6 +37,7 @@ class AgentRun(BaseModel):
         name: Optional human-readable name for the agent run.
         description: Optional description of the agent run.
         transcripts: Dict mapping transcript IDs to Transcript objects.
+        transcript_groups: Dict mapping transcript group IDs to TranscriptGroup objects.
         metadata: Additional structured metadata about the agent run as a JSON-serializable dictionary.
     """
@@ -44,6 +46,7 @@ class AgentRun(BaseModel):
     description: str | None = None
     transcripts: dict[str, Transcript]
+    transcript_groups: dict[str, TranscriptGroup] = Field(default_factory=dict)
     metadata: dict[str, Any] = Field(default_factory=dict)
     @field_serializer("metadata")

{docent_python-0.1.5a0 → docent_python-0.1.7a0}/docent/data_models/transcript.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import sys
+from datetime import datetime
 from typing import Any
 from uuid import uuid4
@@ -73,6 +74,8 @@ class TranscriptGroup(BaseModel):
         id: Unique identifier for the transcript group, auto-generated by default.
         name: Optional human-readable name for the transcript group.
         description: Optional description of the transcript group.
+        collection_id: ID of the collection this transcript group belongs to.
+        agent_run_id: ID of the agent run this transcript group belongs to.
         parent_transcript_group_id: Optional ID of the parent transcript group.
         metadata: Additional structured metadata about the transcript group.
     """
@@ -80,7 +83,10 @@ class TranscriptGroup(BaseModel):
     id: str = Field(default_factory=lambda: str(uuid4()))
     name: str | None = None
     description: str | None = None
+    collection_id: str
+    agent_run_id: str
     parent_transcript_group_id: str | None = None
+    created_at: datetime | None = None
     metadata: dict[str, Any] = Field(default_factory=dict)
     @field_serializer("metadata")
@@ -129,6 +135,7 @@ class Transcript(BaseModel):
     name: str | None = None
     description: str | None = None
     transcript_group_id: str | None = None
+    created_at: datetime | None = None
     messages: list[ChatMessage]
     metadata: dict[str, Any] = Field(default_factory=dict)

docent_python-0.1.7a0/docent/loaders/load_inspect.py ADDED Viewed

@@ -0,0 +1,210 @@
+import json
+from pathlib import Path
+from typing import Any, BinaryIO, Generator, Tuple
+from zipfile import ZipFile
+from inspect_ai.log import EvalLog
+from inspect_ai.scorer import CORRECT, INCORRECT, NOANSWER, PARTIAL, Score
+from docent.data_models import AgentRun, Transcript
+from docent.data_models.chat import parse_chat_message
+def _normalize_inspect_score(score: Score | dict[str, Any]) -> Any:
+    """
+    Normalize an inspect score to a float. Logic mirrors inspect_ai.scorer._metric.value_to_float.
+    Args:
+        score: The inspect score to normalize.
+    Returns:
+        The normalized score as a float, or None if the score is not a valid value.
+    """
+    def _leaf_normalize(value: Any) -> Any:
+        if value is None:
+            return None
+        if isinstance(value, int | float | bool):
+            return float(value)
+        if value == CORRECT:
+            return 1.0
+        if value == PARTIAL:
+            return 0.5
+        if value in [INCORRECT, NOANSWER]:
+            return 0
+        value = str(value).lower()
+        if value in ["yes", "true"]:
+            return 1.0
+        if value in ["no", "false"]:
+            return 0.0
+        if value.replace(".", "").isnumeric():
+            return float(value)
+        return value
+    if isinstance(score, dict):
+        value = score["value"]
+    else:
+        value = score.value
+    if isinstance(value, int | float | bool | str):
+        return _leaf_normalize(value)
+    if isinstance(value, list):
+        return [_leaf_normalize(v) for v in value]  # type: ignore
+    assert isinstance(value, dict), "Inspect score must be leaf value, list, or dict"
+    return {k: _leaf_normalize(v) for k, v in value.items()}  # type: ignore
+def load_inspect_log(log: EvalLog) -> list[AgentRun]:
+    if log.samples is None:
+        return []
+    # TODO(vincent): fix this
+    agent_runs: list[AgentRun] = []
+    for s in log.samples:
+        sample_id = s.id
+        epoch_id = s.epoch
+        if s.scores is None:
+            sample_scores = {}
+        else:
+            sample_scores = {k: _normalize_inspect_score(v) for k, v in s.scores.items()}
+        metadata = {
+            "task_id": log.eval.task,
+            "sample_id": str(sample_id),
+            "epoch_id": epoch_id,
+            "model": log.eval.model,
+            "additional_metadata": s.metadata,
+            "scores": sample_scores,
+            # Scores could have answers, explanations, and other metadata besides the values we extract
+            "scoring_metadata": s.scores,
+        }
+        agent_runs.append(
+            AgentRun(
+                transcripts={
+                    "main": Transcript(
+                        messages=[parse_chat_message(m.model_dump()) for m in s.messages],
+                        metadata={},
+                    )
+                },
+                metadata=metadata,
+            )
+        )
+    return agent_runs
+def _read_sample_as_run(data: dict[str, Any], header_metadata: dict[str, Any] = {}) -> AgentRun:
+    if "scores" in data:
+        normalized_scores = {k: _normalize_inspect_score(v) for k, v in data["scores"].items()}
+    else:
+        normalized_scores = {}
+    if "metadata" in data:
+        sample_metadata = data["metadata"]
+    else:
+        sample_metadata = {}
+    run_metadata: dict[str, Any] = {
+        "sample_id": data.get("id"),
+        "epoch": data.get("epoch"),
+        "target": data.get("target"),
+        # Scores could have answers, explanations, and other metadata besides the values we extract
+        "scoring_metadata": data.get("scores"),
+        "scores": normalized_scores,
+        # If a key exists in header and sample, sample takes precedence
+        **header_metadata,
+        **sample_metadata,
+    }
+    run = AgentRun(
+        transcripts={
+            "main": Transcript(
+                messages=[parse_chat_message(m) for m in data["messages"]], metadata={}
+            ),
+        },
+        metadata=run_metadata,
+    )
+    return run
+def _run_metadata_from_header(header: dict[str, Any]) -> dict[str, Any]:
+    """
+    Inspect logs often have a lot of metadata.
+    This function tries to get the most important stuff without adding clutter.
+    """
+    m: dict[str, Any] = {}
+    if e := header.get("eval"):
+        m["task"] = e["task"]
+        m["model"] = e["model"]
+    return m
+def get_total_samples(file_path: Path, format: str = "json") -> int:
+    """Return the total number of samples in the provided file."""
+    with open(file_path, "rb") as f:
+        if format == "json":
+            data = json.load(f)
+            return len(data.get("samples", []))
+        elif format == "eval":
+            z = ZipFile(f, mode="r")
+            try:
+                return sum(
+                    1
+                    for name in z.namelist()
+                    if name.startswith("samples/") and name.endswith(".json")
+                )
+            finally:
+                z.close()
+        else:
+            raise ValueError(f"Format must be 'json' or 'eval': {format}")
+def _runs_from_eval_file(
+    file: BinaryIO,
+) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
+    zip = ZipFile(file, mode="r")
+    header: dict[str, Any] = json.load(zip.open("header.json", "r"))
+    header_metadata = _run_metadata_from_header(header)
+    def _iter_runs() -> Generator[AgentRun, None, None]:
+        try:
+            for sample_file in zip.namelist():
+                if not (sample_file.startswith("samples/") and sample_file.endswith(".json")):
+                    continue
+                with zip.open(sample_file, "r") as f:
+                    data = json.load(f)
+                run: AgentRun = _read_sample_as_run(data, header_metadata)
+                yield run
+        finally:
+            zip.close()
+    return header_metadata, _iter_runs()
+def _runs_from_json_file(
+    file: BinaryIO,
+) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
+    data = json.load(file)
+    header_metadata = _run_metadata_from_header(data)
+    def _iter_runs() -> Generator[AgentRun, None, None]:
+        for sample in data["samples"]:
+            run: AgentRun = _read_sample_as_run(sample, header_metadata)
+            yield run
+    return header_metadata, _iter_runs()
+def runs_from_file(
+    file: BinaryIO, format: str = "json"
+) -> Tuple[dict[str, Any], Generator[AgentRun, None, None]]:
+    if format == "json":
+        result = _runs_from_json_file(file)
+    elif format == "eval":
+        result = _runs_from_eval_file(file)
+    else:
+        raise ValueError(f"Format must be 'json' or 'eval': {format}")
+    return result

{docent_python-0.1.5a0 → docent_python-0.1.7a0}/docent/trace.py RENAMED Viewed

@@ -12,6 +12,7 @@ from contextlib import asynccontextmanager, contextmanager
 from contextvars import ContextVar, Token
 from datetime import datetime, timezone
 from enum import Enum
+from importlib.metadata import Distribution, distributions
 from typing import Any, AsyncIterator, Callable, Dict, Iterator, List, Optional, Set, Union
 import requests
@@ -19,10 +20,6 @@ from opentelemetry import trace
 from opentelemetry.context import Context
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPExporter
-from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
-from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
-from opentelemetry.instrumentation.langchain import LangchainInstrumentor
-from opentelemetry.instrumentation.openai import OpenAIInstrumentor
 from opentelemetry.instrumentation.threading import ThreadingInstrumentor
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor, TracerProvider
@@ -34,15 +31,19 @@ from opentelemetry.sdk.trace.export import (
 from opentelemetry.trace import Span
 # Configure logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-logger.disabled = True
+logger.setLevel(logging.ERROR)
 # Default configuration
 DEFAULT_ENDPOINT = "https://api.docent.transluce.org/rest/telemetry"
 DEFAULT_COLLECTION_NAME = "default-collection-name"
+def _is_tracing_disabled() -> bool:
+    """Check if tracing is disabled via environment variable."""
+    return os.environ.get("DOCENT_DISABLE_TRACING", "").lower() == "true"
 class Instruments(Enum):
     """Enumeration of available instrument types."""
@@ -93,6 +94,13 @@ class DocentTracer:
             instruments: Set of instruments to enable (None = all instruments)
             block_instruments: Set of instruments to explicitly disable
         """
+        self._initialized: bool = False
+        # Check if tracing is disabled via environment variable
+        if _is_tracing_disabled():
+            self._disabled = True
+            logger.info("Docent tracing disabled via DOCENT_DISABLE_TRACING environment variable")
+            return
         self.collection_name: str = collection_name
         self.collection_id: str = collection_id if collection_id else str(uuid.uuid4())
         self.default_agent_run_id: str = agent_run_id if agent_run_id else str(uuid.uuid4())
@@ -127,7 +135,6 @@ class DocentTracer:
         self._tracer_provider: Optional[TracerProvider] = None
         self._root_context: Optional[Context] = Context()
         self._tracer: Optional[trace.Tracer] = None
-        self._initialized: bool = False
         self._cleanup_registered: bool = False
         self._disabled: bool = False
         self._spans_processors: List[Union[BatchSpanProcessor, SimpleSpanProcessor]] = []
@@ -223,7 +230,7 @@ class DocentTracer:
                 exporters.append(exporter)
                 logger.info(f"Initialized exporter for endpoint: {endpoint}")
             else:
-                logger.warning(f"Failed to initialize exporter for endpoint: {endpoint}")
+                logger.critical(f"Failed to initialize exporter for endpoint: {endpoint}")
         return exporters
@@ -240,7 +247,12 @@ class DocentTracer:
     def initialize(self):
         """Initialize Docent tracing setup."""
-        if self._initialized or self._disabled:
+        if self._initialized:
+            return
+        # If tracing is disabled, mark as initialized but don't set up anything
+        if self._disabled:
+            self._initialized = True
             return
         try:
@@ -326,8 +338,6 @@ class DocentTracer:
                     logger.info(
                         f"Added {len(otlp_exporters)} OTLP exporters for {len(self.endpoints)} endpoints"
                     )
-                else:
-                    logger.warning("Failed to initialize OTLP exporter")
             if self.enable_console_export:
                 console_exporter: ConsoleSpanExporter = ConsoleSpanExporter()
@@ -355,32 +365,44 @@ class DocentTracer:
             # Instrument OpenAI with our isolated tracer provider
             if Instruments.OPENAI in enabled_instruments:
                 try:
-                    OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
-                    logger.info("Instrumented OpenAI")
+                    if is_package_installed("openai"):
+                        from opentelemetry.instrumentation.openai import OpenAIInstrumentor
+                        OpenAIInstrumentor().instrument(tracer_provider=self._tracer_provider)
+                        logger.info("Instrumented OpenAI")
                 except Exception as e:
                     logger.warning(f"Failed to instrument OpenAI: {e}")
             # Instrument Anthropic with our isolated tracer provider
             if Instruments.ANTHROPIC in enabled_instruments:
                 try:
-                    AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
-                    logger.info("Instrumented Anthropic")
+                    if is_package_installed("anthropic"):
+                        from opentelemetry.instrumentation.anthropic import AnthropicInstrumentor
+                        AnthropicInstrumentor().instrument(tracer_provider=self._tracer_provider)
+                        logger.info("Instrumented Anthropic")
                 except Exception as e:
                     logger.warning(f"Failed to instrument Anthropic: {e}")
             # Instrument Bedrock with our isolated tracer provider
             if Instruments.BEDROCK in enabled_instruments:
                 try:
-                    BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
-                    logger.info("Instrumented Bedrock")
+                    if is_package_installed("boto3"):
+                        from opentelemetry.instrumentation.bedrock import BedrockInstrumentor
+                        BedrockInstrumentor().instrument(tracer_provider=self._tracer_provider)
+                        logger.info("Instrumented Bedrock")
                 except Exception as e:
                     logger.warning(f"Failed to instrument Bedrock: {e}")
             # Instrument LangChain with our isolated tracer provider
             if Instruments.LANGCHAIN in enabled_instruments:
                 try:
-                    LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
-                    logger.info("Instrumented LangChain")
+                    if is_package_installed("langchain") or is_package_installed("langgraph"):
+                        from opentelemetry.instrumentation.langchain import LangchainInstrumentor
+                        LangchainInstrumentor().instrument(tracer_provider=self._tracer_provider)
+                        logger.info("Instrumented LangChain")
                 except Exception as e:
                     logger.warning(f"Failed to instrument LangChain: {e}")
@@ -397,6 +419,9 @@ class DocentTracer:
     def cleanup(self):
         """Clean up Docent tracing resources and signal trace completion to backend."""
+        if self._disabled:
+            return
         try:
             # Notify backend that trace is done (no span creation)
             try:
@@ -415,6 +440,9 @@ class DocentTracer:
     def close(self):
         """Explicitly close the Docent tracing manager."""
+        if self._disabled:
+            return
         try:
             self.cleanup()
             if self._cleanup_registered:
@@ -425,6 +453,9 @@ class DocentTracer:
     def flush(self) -> None:
         """Force flush all spans to exporters."""
+        if self._disabled:
+            return
         try:
             for processor in self._spans_processors:
                 if hasattr(processor, "force_flush"):
@@ -440,8 +471,6 @@ class DocentTracer:
     def verify_initialized(self) -> bool:
         """Verify if the manager is properly initialized."""
-        if self._disabled:
-            return False
         return self._initialized
     def __enter__(self) -> "DocentTracer":
@@ -487,6 +516,15 @@ class DocentTracer:
         Yields:
             Tuple of (agent_run_id, transcript_id)
         """
+        if self._disabled:
+            # Return dummy IDs when tracing is disabled
+            if agent_run_id is None:
+                agent_run_id = str(uuid.uuid4())
+            if transcript_id is None:
+                transcript_id = str(uuid.uuid4())
+            yield agent_run_id, transcript_id
+            return
         if not self._initialized:
             self.initialize()
@@ -535,6 +573,15 @@ class DocentTracer:
         Yields:
             Tuple of (agent_run_id, transcript_id)
         """
+        if self._disabled:
+            # Return dummy IDs when tracing is disabled
+            if agent_run_id is None:
+                agent_run_id = str(uuid.uuid4())
+            if transcript_id is None:
+                transcript_id = str(uuid.uuid4())
+            yield agent_run_id, transcript_id
+            return
         if not self._initialized:
             self.initialize()
@@ -600,6 +647,9 @@ class DocentTracer:
             score: Numeric score value
             attributes: Optional additional attributes
         """
+        if self._disabled:
+            return
         collection_id = self.collection_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
@@ -613,6 +663,9 @@ class DocentTracer:
         self._post_json("/v1/scores", payload)
     def send_agent_run_metadata(self, agent_run_id: str, metadata: Dict[str, Any]) -> None:
+        if self._disabled:
+            return
         collection_id = self.collection_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
@@ -640,6 +693,9 @@ class DocentTracer:
             transcript_group_id: Optional transcript group ID
             metadata: Optional metadata to send
         """
+        if self._disabled:
+            return
         collection_id = self.collection_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
@@ -705,6 +761,13 @@ class DocentTracer:
         Yields:
             The transcript ID
         """
+        if self._disabled:
+            # Return dummy ID when tracing is disabled
+            if transcript_id is None:
+                transcript_id = str(uuid.uuid4())
+            yield transcript_id
+            return
         if not self._initialized:
             raise RuntimeError(
                 "Tracer is not initialized. Call initialize_tracing() before using transcript context."
@@ -760,6 +823,13 @@ class DocentTracer:
         Yields:
             The transcript ID
         """
+        if self._disabled:
+            # Return dummy ID when tracing is disabled
+            if transcript_id is None:
+                transcript_id = str(uuid.uuid4())
+            yield transcript_id
+            return
         if not self._initialized:
             raise RuntimeError(
                 "Tracer is not initialized. Call initialize_tracing() before using transcript context."
@@ -811,10 +881,23 @@ class DocentTracer:
             parent_transcript_group_id: Optional parent transcript group ID
             metadata: Optional metadata to send
         """
+        if self._disabled:
+            return
         collection_id = self.collection_id
+        # Get agent_run_id from current context
+        agent_run_id = self.get_current_agent_run_id()
+        if not agent_run_id:
+            logger.error(
+                f"Cannot send transcript group metadata for {transcript_group_id} - no agent_run_id in context"
+            )
+            return
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
             "transcript_group_id": transcript_group_id,
+            "agent_run_id": agent_run_id,
             "timestamp": datetime.now(timezone.utc).isoformat(),
         }
@@ -851,6 +934,13 @@ class DocentTracer:
         Yields:
             The transcript group ID
         """
+        if self._disabled:
+            # Return dummy ID when tracing is disabled
+            if transcript_group_id is None:
+                transcript_group_id = str(uuid.uuid4())
+            yield transcript_group_id
+            return
         if not self._initialized:
             raise RuntimeError(
                 "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
@@ -908,6 +998,13 @@ class DocentTracer:
         Yields:
             The transcript group ID
         """
+        if self._disabled:
+            # Return dummy ID when tracing is disabled
+            if transcript_group_id is None:
+                transcript_group_id = str(uuid.uuid4())
+            yield transcript_group_id
+            return
         if not self._initialized:
             raise RuntimeError(
                 "Tracer is not initialized. Call initialize_tracing() before using transcript group context."
@@ -944,6 +1041,9 @@ class DocentTracer:
             self._transcript_group_id_var.reset(transcript_group_id_token)
     def _send_trace_done(self) -> None:
+        if self._disabled:
+            return
         collection_id = self.collection_id
         payload: Dict[str, Any] = {
             "collection_id": collection_id,
@@ -1019,6 +1119,22 @@ def initialize_tracing(
     return _global_tracer
+def _get_package_name(dist: Distribution) -> str | None:
+    try:
+        return dist.name.lower()
+    except (KeyError, AttributeError):
+        return None
+installed_packages = {
+    name for dist in distributions() if (name := _get_package_name(dist)) is not None
+}
+def is_package_installed(package_name: str) -> bool:
+    return package_name.lower() in installed_packages
 def get_tracer() -> DocentTracer:
     """Get the global Docent tracer."""
     if _global_tracer is None:

{docent_python-0.1.5a0 → docent_python-0.1.7a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.5-alpha"
+version = "0.1.7-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]

docent_python-0.1.5a0/docent/loaders/load_inspect.py DELETED Viewed

@@ -1,88 +0,0 @@
-from typing import Any
-from inspect_ai.log import EvalLog
-from inspect_ai.scorer import CORRECT, INCORRECT, NOANSWER, PARTIAL, Score
-from docent.data_models import AgentRun, Transcript
-from docent.data_models.chat import parse_chat_message
-def _normalize_inspect_score(score: Score) -> Any:
-    """
-    Normalize an inspect score to a float. This implements the same logic as inspect_ai.scorer._metric.value_to_float, but fails more conspicuously.
-    Args:
-        score: The inspect score to normalize.
-    Returns:
-        The normalized score as a float, or None if the score is not a valid value.
-    """
-    def _leaf_normalize(value: int | float | bool | str | None) -> float | str | None:
-        if value is None:
-            return None
-        if isinstance(value, int | float | bool):
-            return float(value)
-        if value == CORRECT:
-            return 1.0
-        if value == PARTIAL:
-            return 0.5
-        if value in [INCORRECT, NOANSWER]:
-            return 0
-        value = str(value).lower()
-        if value in ["yes", "true"]:
-            return 1.0
-        if value in ["no", "false"]:
-            return 0.0
-        if value.replace(".", "").isnumeric():
-            return float(value)
-        return value
-    if isinstance(score.value, int | float | bool | str):
-        return _leaf_normalize(score.value)
-    if isinstance(score.value, list):
-        return [_leaf_normalize(v) for v in score.value]
-    assert isinstance(score.value, dict), "Inspect score must be leaf value, list, or dict"
-    return {k: _leaf_normalize(v) for k, v in score.value.items()}
-def load_inspect_log(log: EvalLog) -> list[AgentRun]:
-    if log.samples is None:
-        return []
-    # TODO(vincent): fix this
-    agent_runs: list[AgentRun] = []
-    for s in log.samples:
-        sample_id = s.id
-        epoch_id = s.epoch
-        if s.scores is None:
-            sample_scores = {}
-        else:
-            sample_scores = {k: _normalize_inspect_score(v) for k, v in s.scores.items()}
-        metadata = {
-            "task_id": log.eval.task,
-            "sample_id": str(sample_id),
-            "epoch_id": epoch_id,
-            "model": log.eval.model,
-            "additional_metadata": s.metadata,
-            "scores": sample_scores,
-            # Scores could have answers, explanations, and other metadata besides the values we extract
-            "scoring_metadata": s.scores,
-        }
-        agent_runs.append(
-            AgentRun(
-                transcripts={
-                    "main": Transcript(
-                        messages=[parse_chat_message(m.model_dump()) for m in s.messages],
-                        metadata={},
-                    )
-                },
-                metadata=metadata,
-            )
-        )
-    return agent_runs