PyPI - docent-python - Versions diffs - 0.1.15a0__tar.gz → 0.1.17a0__tar.gz - Mend

docent-python 0.1.15a0tar.gz → 0.1.17a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docent-python might be problematic. Click here for more details.

Files changed (34) hide show

{docent_python-0.1.15a0 → docent_python-0.1.17a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.15a0
+Version: 0.1.17a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/agent_run.py RENAMED Viewed

@@ -147,18 +147,12 @@ class AgentRun(BaseModel):
         # Generate transcript strings using appropriate method
         transcript_strs: list[str] = []
         for i, t in enumerate(self.transcripts):
-            if use_blocks:
-                transcript_content = t.to_str_blocks_with_token_limit(
-                    token_limit=sys.maxsize,
-                    transcript_idx=i,
-                    agent_run_idx=None,
-                )[0]
-            else:
-                transcript_content = t.to_str_with_token_limit(
-                    token_limit=sys.maxsize,
-                    transcript_idx=i,
-                    agent_run_idx=None,
-                )[0]
+            transcript_content = t.to_str(
+                token_limit=sys.maxsize,
+                transcript_idx=i,
+                agent_run_idx=None,
+                use_action_units=not use_blocks,
+            )[0]
             transcript_strs.append(f"<transcript>\n{transcript_content}\n</transcript>")
         transcripts_str = "\n\n".join(transcript_strs)
@@ -207,23 +201,16 @@ class AgentRun(BaseModel):
                     ), "Ranges without metadata should be a single message"
                     t = self.transcripts[msg_range.start]
                     if msg_range.num_tokens < token_limit - 50:
-                        if use_blocks:
-                            transcript = f"<transcript>\n{t.to_str_blocks_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
-                        else:
-                            transcript = f"<transcript>\n{t.to_str_with_token_limit(token_limit=sys.maxsize)[0]}\n</transcript>"
+                        transcript = f"<transcript>\n{t.to_str(token_limit=sys.maxsize, use_action_units=not use_blocks)[0]}\n</transcript>"
                         result = (
                             f"Here is a partial agent run for analysis purposes only:\n{transcript}"
                         )
                         results.append(result)
                     else:
-                        if use_blocks:
-                            transcript_fragments = t.to_str_blocks_with_token_limit(
-                                token_limit=token_limit - 50,
-                            )
-                        else:
-                            transcript_fragments = t.to_str_with_token_limit(
-                                token_limit=token_limit - 50,
-                            )
+                        transcript_fragments = t.to_str(
+                            token_limit=token_limit - 50,
+                            use_action_units=not use_blocks,
+                        )
                         for fragment in transcript_fragments:
                             result = f"<transcript>\n{fragment}\n</transcript>"
                             result = (

{docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/chat/message.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from logging import getLogger
 from typing import Annotated, Any, Literal
-from pydantic import BaseModel, Discriminator
+from pydantic import BaseModel, Discriminator, Field
 from docent.data_models.chat.content import Content
 from docent.data_models.chat.tool import ToolCall
@@ -17,11 +17,13 @@ class BaseChatMessage(BaseModel):
         id: Optional unique identifier for the message.
         content: The message content, either as a string or list of Content objects.
         role: The role of the message sender (system, user, assistant, tool).
+        metadata: Additional structured metadata about the message.
     """
     id: str | None = None
     content: str | list[Content]
     role: Literal["system", "user", "assistant", "tool"]
+    metadata: dict[str, Any] = Field(default_factory=dict)
     @property
     def text(self) -> str:

{docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/data_models/transcript.py RENAMED Viewed

@@ -72,6 +72,11 @@ def format_chat_message(
                 args = ", ".join([f"{k}={v}" for k, v in tool_call.arguments.items()])
                 cur_content += f"\n<tool call>\n{tool_call.function}({args})\n</tool call>"
+    if message.metadata:
+        metadata_yaml = yaml_dump_metadata(message.metadata)
+        if metadata_yaml is not None:
+            cur_content += f"\n<|message metadata|>\n{metadata_yaml}\n</|message metadata|>"
     return TRANSCRIPT_BLOCK_TEMPLATE.format(
         index_label=index_label, role=message.role, content=cur_content
     )
@@ -301,20 +306,6 @@ class Transcript(BaseModel):
         self.messages = messages
         self._units_of_action = self._compute_units_of_action()
-    def to_str(
-        self,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-        highlight_action_unit: int | None = None,
-    ) -> str:
-        return self._to_str_with_token_limit_impl(
-            token_limit=sys.maxsize,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=True,
-            highlight_action_unit=highlight_action_unit,
-        )[0]
     def _generate_formatted_blocks(
         self,
         transcript_idx: int = 0,
@@ -379,9 +370,9 @@ class Transcript(BaseModel):
         return blocks
-    def _to_str_with_token_limit_impl(
+    def to_str(
         self,
-        token_limit: int,
+        token_limit: int = sys.maxsize,
         transcript_idx: int = 0,
         agent_run_idx: int | None = None,
         use_action_units: bool = True,
@@ -408,7 +399,7 @@ class Transcript(BaseModel):
         metadata_obj = to_jsonable_python(self.metadata)
         yaml_width = float("inf")
         block_str = f"<blocks>\n{blocks_str}\n</blocks>\n"
-        metadata_str = f"<metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</metadata>"
+        metadata_str = f"<|transcript metadata|>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</|transcript metadata|>"
         if token_limit == sys.maxsize:
             return [f"{block_str}" f"{metadata_str}"]
@@ -439,56 +430,6 @@ class Transcript(BaseModel):
             return results
-    def to_str_blocks(
-        self,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-    ) -> str:
-        """Represents the transcript as a string using individual message blocks.
-        Unlike to_str() which groups messages into action units, this method
-        formats each message as an individual block.
-        Returns:
-            str: A string representation with individual message blocks.
-        """
-        return self._to_str_with_token_limit_impl(
-            token_limit=sys.maxsize,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=False,
-        )[0]
-    def to_str_with_token_limit(
-        self,
-        token_limit: int,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-        highlight_action_unit: int | None = None,
-    ) -> list[str]:
-        """Represents the transcript as a list of strings using action units with token limit handling."""
-        return self._to_str_with_token_limit_impl(
-            token_limit=token_limit,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=True,
-            highlight_action_unit=highlight_action_unit,
-        )
-    def to_str_blocks_with_token_limit(
-        self,
-        token_limit: int,
-        transcript_idx: int = 0,
-        agent_run_idx: int | None = None,
-    ) -> list[str]:
-        """Represents the transcript as individual blocks with token limit handling."""
-        return self._to_str_with_token_limit_impl(
-            token_limit=token_limit,
-            transcript_idx=transcript_idx,
-            agent_run_idx=agent_run_idx,
-            use_action_units=False,
-        )
     ##############################
     # New text rendering methods #
     ##############################

{docent_python-0.1.15a0 → docent_python-0.1.17a0}/docent/sdk/agent_run_writer.py RENAMED Viewed

@@ -105,9 +105,17 @@ class AgentRunWriter:
         # Register shutdown hooks
         atexit.register(self.finish)
+        def _handle_sigint(s: int, f: object) -> None:
+            self._shutdown()
+            raise KeyboardInterrupt
+        def _handle_sigterm(s: int, f: object) -> None:
+            self._shutdown()
+            raise SystemExit(0)
         # Register signal handlers for graceful shutdown
-        signal.signal(signal.SIGINT, lambda s, f: self._shutdown())  # Ctrl+C
-        signal.signal(signal.SIGTERM, lambda s, f: self._shutdown())  # Kill signal
+        signal.signal(signal.SIGINT, _handle_sigint)  # Ctrl+C
+        signal.signal(signal.SIGTERM, _handle_sigterm)  # Kill signal
     def log_agent_runs(self, agent_runs: list[AgentRun]) -> None:
         """Put a list of AgentRun objects into the queue.

{docent_python-0.1.15a0 → docent_python-0.1.17a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.15-alpha"
+version = "0.1.17-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]