PyPI - docent-python - Versions diffs - 0.1.14a0__py3-none-any.whl → 0.1.28a0__py3-none-any.whl - Mend

docent-python 0.1.14a0py3-none-any.whl → 0.1.28a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docent-python might be problematic. Click here for more details.

Files changed (46) hide show

docent/_llm_util/__init__.py +0 -0
docent/_llm_util/data_models/__init__.py +0 -0
docent/_llm_util/data_models/exceptions.py +48 -0
docent/_llm_util/data_models/llm_output.py +331 -0
docent/_llm_util/llm_cache.py +193 -0
docent/_llm_util/llm_svc.py +472 -0
docent/_llm_util/model_registry.py +130 -0
docent/_llm_util/providers/__init__.py +0 -0
docent/_llm_util/providers/anthropic.py +537 -0
docent/_llm_util/providers/common.py +41 -0
docent/_llm_util/providers/google.py +530 -0
docent/_llm_util/providers/openai.py +745 -0
docent/_llm_util/providers/openrouter.py +375 -0
docent/_llm_util/providers/preference_types.py +104 -0
docent/_llm_util/providers/provider_registry.py +164 -0
docent/data_models/__init__.py +2 -0
docent/data_models/agent_run.py +17 -29
docent/data_models/chat/__init__.py +6 -1
docent/data_models/chat/message.py +3 -1
docent/data_models/citation.py +103 -22
docent/data_models/judge.py +19 -0
docent/data_models/metadata_util.py +16 -0
docent/data_models/remove_invalid_citation_ranges.py +23 -10
docent/data_models/transcript.py +25 -80
docent/data_models/util.py +170 -0
docent/judges/__init__.py +23 -0
docent/judges/analysis.py +77 -0
docent/judges/impl.py +587 -0
docent/judges/runner.py +129 -0
docent/judges/stats.py +205 -0
docent/judges/types.py +311 -0
docent/judges/util/forgiving_json.py +108 -0
docent/judges/util/meta_schema.json +86 -0
docent/judges/util/meta_schema.py +29 -0
docent/judges/util/parse_output.py +87 -0
docent/judges/util/voting.py +139 -0
docent/sdk/agent_run_writer.py +72 -21
docent/sdk/client.py +276 -23
docent/trace.py +413 -90
{docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/METADATA +13 -5
docent_python-0.1.28a0.dist-info/RECORD +59 -0
docent/data_models/metadata.py +0 -229
docent/data_models/yaml_util.py +0 -12
docent_python-0.1.14a0.dist-info/RECORD +0 -32
{docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/WHEEL +0 -0
{docent_python-0.1.14a0.dist-info → docent_python-0.1.28a0.dist-info}/licenses/LICENSE.md +0 -0

docent/data_models/agent_run.py CHANGED Viewed

@@ -17,8 +17,8 @@ from pydantic_core import to_jsonable_python
 from docent._log_util import get_logger
 from docent.data_models._tiktoken_util import get_token_count, group_messages_into_ranges
+from docent.data_models.metadata_util import dump_metadata
 from docent.data_models.transcript import Transcript, TranscriptGroup
-from docent.data_models.yaml_util import yaml_dump_metadata
 logger = get_logger(__name__)
@@ -125,6 +125,7 @@ class AgentRun(BaseModel):
         #     )
         # Append the text field
+        result.append({"name": "agent_run_id", "type": "str"})
         result.append({"name": "text", "type": "str"})
         return result
@@ -147,18 +148,12 @@ class AgentRun(BaseModel):
         # Generate transcript strings using appropriate method
         transcript_strs: list[str] = []
         for i, t in enumerate(self.transcripts):
-            if use_blocks:
-                transcript_content = t.to_str_blocks_with_token_limit(
-                    token_limit=sys.maxsize,
-                    transcript_idx=i,
-                    agent_run_idx=None,
-                )[0]
-            else:
-                transcript_content = t.to_str_with_token_limit(
-                    token_limit=sys.maxsize,
-                    transcript_idx=i,
-                    agent_run_idx=None,
-                )[0]
+            transcript_content = t.to_str(
+                token_limit=sys.maxsize,
+                transcript_idx=i,
+                agent_run_idx=None,
+                use_action_units=not use_blocks,
+            )[0]
             transcript_strs.append(f"<transcript>\n{transcript_content}\n</transcript>")
         transcripts_str = "\n\n".join(transcript_strs)
@@ -207,23 +202,16 @@ class AgentRun(BaseModel):
                     ), "Ranges without metadata should be a single message"
                     t = self.transcripts[msg_range.start]
                     if msg_range.num_tokens < token_limit - 50:
-                        if use_blocks:
-                            transcript = f"<transcript>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
-                        else:
-                            transcript = f"<transcript>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
+                        transcript = f"<transcript>\n{t.to_str(token_limit=sys.maxsize, use_action_units=not use_blocks)[0]}\n</transcript>"
                         result = (
                             f"Here is a partial agent run for analysis purposes only:\n{transcript}"
                         )
                         results.append(result)
                     else:
-                        if use_blocks:
-                            transcript_fragments = t.to_str_blocks_with_token_limit(
-                                token_limit=token_limit - 50,
-                            )
-                        else:
-                            transcript_fragments = t.to_str_with_token_limit(
-                                token_limit=token_limit - 50,
-                            )
+                        transcript_fragments = t.to_str(
+                            token_limit=token_limit - 50,
+                            use_action_units=not use_blocks,
+                        )
                         for fragment in transcript_fragments:
                             result = f"<transcript>\n{fragment}\n</transcript>"
                             result = (
@@ -459,10 +447,10 @@ class AgentRun(BaseModel):
         text = _recurse("__global_root")
         # Append agent run metadata below the full content
-        yaml_text = yaml_dump_metadata(self.metadata)
-        if yaml_text is not None:
+        metadata_text = dump_metadata(self.metadata)
+        if metadata_text is not None:
             if indent > 0:
-                yaml_text = textwrap.indent(yaml_text, " " * indent)
-            text += f"\n<|agent run metadata|>\n{yaml_text}\n</|agent run metadata|>"
+                metadata_text = textwrap.indent(metadata_text, " " * indent)
+            text += f"\n<|agent run metadata|>\n{metadata_text}\n</|agent run metadata|>"
         return text

docent/data_models/chat/__init__.py CHANGED Viewed

@@ -7,7 +7,12 @@ from docent.data_models.chat.message import (
     UserMessage,
     parse_chat_message,
 )
-from docent.data_models.chat.tool import ToolCall, ToolCallContent, ToolInfo, ToolParams
+from docent.data_models.chat.tool import (
+    ToolCall,
+    ToolCallContent,
+    ToolInfo,
+    ToolParams,
+)
 __all__ = [
     "ChatMessage",

docent/data_models/chat/message.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from logging import getLogger
 from typing import Annotated, Any, Literal
-from pydantic import BaseModel, Discriminator
+from pydantic import BaseModel, Discriminator, Field
 from docent.data_models.chat.content import Content
 from docent.data_models.chat.tool import ToolCall
@@ -17,11 +17,13 @@ class BaseChatMessage(BaseModel):
         id: Optional unique identifier for the message.
         content: The message content, either as a string or list of Content objects.
         role: The role of the message sender (system, user, assistant, tool).
+        metadata: Additional structured metadata about the message.
     """
     id: str | None = None
     content: str | list[Content]
     role: Literal["system", "user", "assistant", "tool"]
+    metadata: dict[str, Any] = Field(default_factory=dict)
     @property
     def text(self) -> str:

docent/data_models/citation.py CHANGED Viewed

@@ -1,15 +1,27 @@
 import re
+from dataclasses import dataclass
 from pydantic import BaseModel
+@dataclass
+class ParsedCitation:
+    """Represents a parsed citation before conversion to full Citation object."""
+    transcript_idx: int | None
+    block_idx: int | None
+    metadata_key: str | None = None
+    start_pattern: str | None = None
 class Citation(BaseModel):
     start_idx: int
     end_idx: int
     agent_run_idx: int | None = None
     transcript_idx: int | None = None
-    block_idx: int
+    block_idx: int | None = None
     action_unit_idx: int | None = None
+    metadata_key: str | None = None
     start_pattern: str | None = None
@@ -17,6 +29,9 @@ RANGE_BEGIN = "<RANGE>"
 RANGE_END = "</RANGE>"
 _SINGLE_RE = re.compile(r"T(\d+)B(\d+)")
+_METADATA_RE = re.compile(r"^M\.([^:]+)$")  # [M.key]
+_TRANSCRIPT_METADATA_RE = re.compile(r"^T(\d+)M\.([^:]+)$")  # [T0M.key]
+_MESSAGE_METADATA_RE = re.compile(r"^T(\d+)B(\d+)M\.([^:]+)$")  # [T0B1M.key]
 _RANGE_CONTENT_RE = re.compile(r":\s*" + re.escape(RANGE_BEGIN) + r".*?" + re.escape(RANGE_END))
@@ -70,41 +85,93 @@ def scan_brackets(text: str) -> list[tuple[int, int, str]]:
     return matches
-def parse_single_citation(part: str) -> tuple[int, int, str | None] | None:
+def parse_single_citation(part: str) -> ParsedCitation | None:
     """
     Parse a single citation token inside a bracket and return its components.
-    Returns (transcript_idx, block_idx, start_pattern) or None if invalid.
+    Returns ParsedCitation or None if invalid.
+    For metadata citations, transcript_idx may be None (for agent run metadata).
+    Supports optional text range for all valid citation kinds.
     """
     token = part.strip()
     if not token:
         return None
+    # Extract optional range part
+    start_pattern: str | None = None
+    citation_part = token
     if ":" in token:
-        citation_part, range_part = token.split(":", 1)
-        single_match = _SINGLE_RE.match(citation_part.strip())
-        if not single_match:
+        left, right = token.split(":", 1)
+        citation_part = left.strip()
+        start_pattern = _extract_range_pattern(right)
+    # Try matches in order of specificity
+    # 1) Message metadata [T0B0M.key]
+    m = _MESSAGE_METADATA_RE.match(citation_part)
+    if m:
+        transcript_idx = int(m.group(1))
+        block_idx = int(m.group(2))
+        metadata_key = m.group(3)
+        # Disallow nested keys like status.code per instruction
+        if "." in metadata_key:
             return None
-        transcript_idx = int(single_match.group(1))
-        block_idx = int(single_match.group(2))
-        start_pattern = _extract_range_pattern(range_part)
-        return transcript_idx, block_idx, start_pattern
-    else:
-        single_match = _SINGLE_RE.match(token)
-        if not single_match:
+        return ParsedCitation(
+            transcript_idx=transcript_idx,
+            block_idx=block_idx,
+            metadata_key=metadata_key,
+            start_pattern=start_pattern,
+        )
+    # 2) Transcript metadata [T0M.key]
+    m = _TRANSCRIPT_METADATA_RE.match(citation_part)
+    if m:
+        transcript_idx = int(m.group(1))
+        metadata_key = m.group(2)
+        if "." in metadata_key:
             return None
-        transcript_idx = int(single_match.group(1))
-        block_idx = int(single_match.group(2))
-        return transcript_idx, block_idx, None
+        return ParsedCitation(
+            transcript_idx=transcript_idx,
+            block_idx=None,
+            metadata_key=metadata_key,
+            start_pattern=start_pattern,
+        )
+    # 3) Agent run metadata [M.key]
+    m = _METADATA_RE.match(citation_part)
+    if m:
+        metadata_key = m.group(1)
+        if "." in metadata_key:
+            return None
+        return ParsedCitation(
+            transcript_idx=None,
+            block_idx=None,
+            metadata_key=metadata_key,
+            start_pattern=start_pattern,
+        )
+    # 4) Regular transcript block [T0B0]
+    m = _SINGLE_RE.match(citation_part)
+    if m:
+        transcript_idx = int(m.group(1))
+        block_idx = int(m.group(2))
+        return ParsedCitation(
+            transcript_idx=transcript_idx, block_idx=block_idx, start_pattern=start_pattern
+        )
+    return None
 def parse_citations(text: str) -> tuple[str, list[Citation]]:
     """
-    Parse citations from text in the format described by BLOCK_RANGE_CITE_INSTRUCTION.
+    Parse citations from text in the format described by TEXT_RANGE_CITE_INSTRUCTION.
     Supported formats:
     - Single block: [T<key>B<idx>]
     - Text range with start pattern: [T<key>B<idx>:<RANGE>start_pattern</RANGE>]
+    - Agent run metadata: [M.key]
+    - Transcript metadata: [T<key>M.key]
+    - Message metadata: [T<key>B<idx>M.key]
+    - Message metadata with text range: [T<key>B<idx>M.key:<RANGE>start_pattern</RANGE>]
     Args:
         text: The text to parse citations from
@@ -127,8 +194,21 @@ def parse_citations(text: str) -> tuple[str, list[Citation]]:
         # Parse a single citation token inside the bracket
         parsed = parse_single_citation(bracket_content)
         if parsed:
-            transcript_idx, block_idx, start_pattern = parsed
-            replacement = f"T{transcript_idx}B{block_idx}"
+            # Create appropriate replacement text based on citation type
+            if parsed.metadata_key:
+                if parsed.transcript_idx is None:
+                    # Agent run metadata [M.key]
+                    replacement = "run metadata"
+                elif parsed.block_idx is None:
+                    # Transcript metadata [T0M.key]
+                    replacement = f"T{parsed.transcript_idx}"
+                else:
+                    # Message metadata [T0B1M.key]
+                    replacement = f"T{parsed.transcript_idx}B{parsed.block_idx}"
+            else:
+                # Regular transcript block [T0B1]
+                replacement = f"T{parsed.transcript_idx}B{parsed.block_idx}"
             # Current absolute start position for this replacement in the cleaned text
             start_idx = len(cleaned_text)
             end_idx = start_idx + len(replacement)
@@ -137,10 +217,11 @@ def parse_citations(text: str) -> tuple[str, list[Citation]]:
                     start_idx=start_idx,
                     end_idx=end_idx,
                     agent_run_idx=None,
-                    transcript_idx=transcript_idx,
-                    block_idx=block_idx,
+                    transcript_idx=parsed.transcript_idx,
+                    block_idx=parsed.block_idx,
                     action_unit_idx=None,
-                    start_pattern=start_pattern,
+                    metadata_key=parsed.metadata_key,
+                    start_pattern=parsed.start_pattern,
                 )
             )
             cleaned_text += replacement

docent/data_models/judge.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""Judge-related data models shared across Docent components."""
+from typing import Any
+from uuid import uuid4
+from pydantic import BaseModel, Field
+class Label(BaseModel):
+    id: str = Field(default_factory=lambda: str(uuid4()))
+    label_set_id: str
+    label_value: dict[str, Any]
+    agent_run_id: str
+__all__ = ["Label"]

docent/data_models/metadata_util.py ADDED Viewed

@@ -0,0 +1,16 @@
+import json
+from typing import Any
+from pydantic_core import to_jsonable_python
+def dump_metadata(metadata: dict[str, Any]) -> str | None:
+    """
+    Dump metadata to a JSON string.
+    We used to use YAML to save tokens, but JSON makes it easier to find cited ranges on the frontend because the frontend uses JSON.
+    """
+    if not metadata:
+        return None
+    metadata_obj = to_jsonable_python(metadata)
+    text = json.dumps(metadata_obj, indent=2)
+    return text.strip()

docent/data_models/remove_invalid_citation_ranges.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import re
 from docent.data_models.agent_run import AgentRun
@@ -52,7 +53,7 @@ def find_citation_matches_in_text(text: str, start_pattern: str) -> list[tuple[i
 def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) -> str | None:
     """
-    Get the text content of a specific transcript block from an AgentRun,
+    Get the text content of a specific transcript block (or transcript/run metadata) from an AgentRun,
     using the same formatting as shown to LLMs via format_chat_message.
     Args:
@@ -62,19 +63,28 @@ def get_transcript_text_for_citation(agent_run: AgentRun, citation: Citation) ->
     Returns:
         Text content of the specified block (including tool calls), or None if not found
     """
-    if citation.transcript_idx is None:
-        return None
     try:
-        if citation.transcript_idx >= len(agent_run.get_transcript_ids_ordered()):
+        if citation.transcript_idx is None:
+            # At the run level, can only cite metadata
+            if citation.metadata_key is not None:
+                return json.dumps(agent_run.metadata.get(citation.metadata_key))
             return None
         transcript_id = agent_run.get_transcript_ids_ordered()[citation.transcript_idx]
         transcript = agent_run.transcript_dict[transcript_id]
-        if citation.block_idx >= len(transcript.messages):
+        if citation.block_idx is None:
+            # At the transcript level, can only cite metadata
+            if citation.metadata_key is not None:
+                return json.dumps(transcript.metadata.get(citation.metadata_key))
             return None
         message = transcript.messages[citation.block_idx]
+        # At the message level, can cite metadata or content
+        if citation.metadata_key is not None:
+            return json.dumps(message.metadata.get(citation.metadata_key))
         # Use the same formatting function that generates content for LLMs
         # This ensures consistent formatting between citation validation and LLM serialization
         return format_chat_message(
@@ -99,6 +109,9 @@ def validate_citation_text_range(agent_run: AgentRun, citation: Citation) -> boo
     if not citation.start_pattern:
         # Nothing to validate
         return True
+    if citation.metadata_key is not None:
+        # We don't need to remove invalid metadata citation ranges
+        return True
     text = get_transcript_text_for_citation(agent_run, citation)
     if text is None:
@@ -130,16 +143,16 @@ def remove_invalid_citation_ranges(text: str, agent_run: AgentRun) -> str:
         # Parse this bracket content to get citation info
         parsed = parse_single_citation(bracket_content)
         if parsed:
-            transcript_idx, block_idx, start_pattern = parsed
             # The citation spans from start to end in the original text
             citation = Citation(
                 start_idx=start,
                 end_idx=end,
                 agent_run_idx=None,
-                transcript_idx=transcript_idx,
-                block_idx=block_idx,
+                transcript_idx=parsed.transcript_idx,
+                block_idx=parsed.block_idx,
                 action_unit_idx=None,
-                start_pattern=start_pattern,
+                metadata_key=parsed.metadata_key,
+                start_pattern=parsed.start_pattern,
             )
             citations.append(citation)

docent/data_models/transcript.py CHANGED Viewed

@@ -15,7 +15,7 @@ from docent.data_models._tiktoken_util import (
 )
 from docent.data_models.chat import AssistantMessage, ChatMessage, ContentReasoning
 from docent.data_models.citation import RANGE_BEGIN, RANGE_END
-from docent.data_models.yaml_util import yaml_dump_metadata
+from docent.data_models.metadata_util import dump_metadata
 # Template for formatting individual transcript blocks
 TRANSCRIPT_BLOCK_TEMPLATE = """
@@ -29,6 +29,12 @@ TEXT_RANGE_CITE_INSTRUCTION = f"""Anytime you quote the transcript, or refer to
 A citation may include a specific range of text within a block. Use {RANGE_BEGIN} and {RANGE_END} to mark the specific range of text. Add it after the block ID separated by a colon. For example, to cite the part of transcript 0, block 1, where the agent says "I understand the task", write [T0B1:{RANGE_BEGIN}I understand the task{RANGE_END}]. Citations must follow this exact format. The markers {RANGE_BEGIN} and {RANGE_END} must be used ONLY inside the brackets of a citation.
+- You may cite a top-level key in the agent run metadata like this: [M.task_description].
+- You may cite a top-level key in transcript metadata. For example, for transcript 0: [T0M.start_time].
+- You may cite a top-level key in message metadata for a block. For example, for transcript 0, block 1: [T0B1M.status].
+- You may not cite nested keys. For example, [T0B1M.status.code] is invalid.
+- Within a top-level metadata key you may cite a range of text that appears in the value. For example, [T0B1M.status:{RANGE_BEGIN}"running":false{RANGE_END}].
 Important notes:
 - You must include the full content of the text range {RANGE_BEGIN} and {RANGE_END}, EXACTLY as it appears in the transcript, word-for-word, including any markers or punctuation that appear in the middle of the text.
 - Citations must be as specific as possible. This means you should usually cite a specific text range within a block.
@@ -36,6 +42,8 @@ Important notes:
 - Citations are self-contained. Do NOT label them as citation or evidence. Just insert the citation by itself at the appropriate place in the text.
 - Citations must come immediately after the part of a claim that they support. This may be in the middle of a sentence.
 - Each pair of brackets must contain only one citation. To cite multiple blocks, use multiple pairs of brackets, like [T0B0] [T0B1].
+- Outside of citations, do not refer to transcript numbers or block numbers.
+- Outside of citations, avoid quoting or paraphrasing the transcript.
 """
 BLOCK_CITE_INSTRUCTION = """Each transcript and each block has a unique index. Cite the relevant indices in brackets when relevant, like [T<idx>B<idx>]. Use multiple tags to cite multiple blocks, like [T<idx1>B<idx1>][T<idx2>B<idx2>]. Remember to cite specific blocks and NOT action units."""
@@ -72,6 +80,11 @@ def format_chat_message(
                 args = ", ".join([f"{k}={v}" for k, v in tool_call.arguments.items()])
                 cur_content += f"\n<tool call>\n{tool_call.function}({args})\n</tool call>"
+    if message.metadata:
+        metadata_text = dump_metadata(message.metadata)
+        if metadata_text is not None:
+            cur_content += f"\n<|message metadata|>\n{metadata_text}\n</|message metadata|>"
     return TRANSCRIPT_BLOCK_TEMPLATE.format(
         index_label=index_label, role=message.role, content=cur_content
     )
@@ -122,13 +135,11 @@ class TranscriptGroup(BaseModel):
             str: XML-like wrapped text including the group's metadata.
         """
         # Prepare YAML metadata
-        yaml_text = yaml_dump_metadata(self.metadata)
-        if yaml_text is not None:
+        metadata_text = dump_metadata(self.metadata)
+        if metadata_text is not None:
             if indent > 0:
-                yaml_text = textwrap.indent(yaml_text, " " * indent)
-            inner = (
-                f"{children_text}\n<|{self.name} metadata|>\n{yaml_text}\n</|{self.name} metadata|>"
-            )
+                metadata_text = textwrap.indent(metadata_text, " " * indent)
+            inner = f"{children_text}\n<|{self.name} metadata|>\n{metadata_text}\n</|{self.name} metadata|>"
         else:
             inner = children_text
@@ -301,20 +312,6 @@ class Transcript(BaseModel):
         self.messages = messages
         self._units_of_action = self._compute_units_of_action()
-    def to_str(
-        self,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-        highlight_action_unit: int | None = None,
-    ) -> str:
-        return self._to_str_with_token_limit_impl(
-            token_limit=sys.maxsize,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=True,
-            highlight_action_unit=highlight_action_unit,
-        )[0]
     def _generate_formatted_blocks(
         self,
         transcript_idx: int = 0,
@@ -379,9 +376,9 @@ class Transcript(BaseModel):
         return blocks
-    def _to_str_with_token_limit_impl(
+    def to_str(
         self,
-        token_limit: int,
+        token_limit: int = sys.maxsize,
         transcript_idx: int = 0,
         agent_run_idx: int | None = None,
         use_action_units: bool = True,
@@ -408,7 +405,7 @@ class Transcript(BaseModel):
         metadata_obj = to_jsonable_python(self.metadata)
         yaml_width = float("inf")
         block_str = f"<blocks>\n{blocks_str}\n</blocks>\n"
-        metadata_str = f"<metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</metadata>"
+        metadata_str = f"<|transcript metadata|>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</|transcript metadata|>"
         if token_limit == sys.maxsize:
             return [f"{block_str}" f"{metadata_str}"]
@@ -439,56 +436,6 @@ class Transcript(BaseModel):
             return results
-    def to_str_blocks(
-        self,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-    ) -> str:
-        """Represents the transcript as a string using individual message blocks.
-        Unlike to_str() which groups messages into action units, this method
-        formats each message as an individual block.
-        Returns:
-            str: A string representation with individual message blocks.
-        """
-        return self._to_str_with_token_limit_impl(
-            token_limit=sys.maxsize,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=False,
-        )[0]
-    def to_str_with_token_limit(
-        self,
-        token_limit: int,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-        highlight_action_unit: int | None = None,
-    ) -> list[str]:
-        """Represents the transcript as a list of strings using action units with token limit handling."""
-        return self._to_str_with_token_limit_impl(
-            token_limit=token_limit,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=True,
-            highlight_action_unit=highlight_action_unit,
-        )
-    def to_str_blocks_with_token_limit(
-        self,
-        token_limit: int,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-    ) -> list[str]:
-        """Represents the transcript as individual blocks with token limit handling."""
-        return self._to_str_with_token_limit_impl(
-            token_limit=token_limit,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=False,
-        )
     ##############################
     # New text rendering methods #
     ##############################
@@ -506,13 +453,11 @@ class Transcript(BaseModel):
         content_str = f"<|T{transcript_idx} blocks|>\n{blocks_str}\n</|T{transcript_idx} blocks|>"
         # Gather metadata and add to content
-        yaml_text = yaml_dump_metadata(self.metadata)
-        if yaml_text is not None:
+        metadata_text = dump_metadata(self.metadata)
+        if metadata_text is not None:
             if indent > 0:
-                yaml_text = textwrap.indent(yaml_text, " " * indent)
-            content_str += (
-                f"\n<|T{transcript_idx} metadata|>\n{yaml_text}\n</|T{transcript_idx} metadata|>"
-            )
+                metadata_text = textwrap.indent(metadata_text, " " * indent)
+            content_str += f"\n<|T{transcript_idx} metadata|>\n{metadata_text}\n</|T{transcript_idx} metadata|>"
         # Format content and return
         if indent > 0:

docent-python 0.1.14a0__py3-none-any.whl → 0.1.28a0__py3-none-any.whl

Potentially problematic release.

docent-python 0.1.14a0py3-none-any.whl → 0.1.28a0py3-none-any.whl