PyPI - docent-python - Versions diffs - 0.1.63a0__tar.gz → 0.1.64a0__tar.gz - Mend

docent-python 0.1.63a0tar.gz → 0.1.64a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.63a0
+Version: 0.1.64a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/docent/judges/impl.py RENAMED Viewed

@@ -145,7 +145,9 @@ class BaseJudge(ABC):
         Raises:
             ValidationFailedException: With the specific reason validation failed
         """
-        return parse_and_validate_output_str(output_str, self.cfg.output_schema)
+        return parse_and_validate_output_str(
+            output_str, self.cfg.output_schema, output_format="json"
+        )
     def _parse_xml_key_output(self, output_str: str, agent_run: AgentRun) -> dict[str, Any]:
         """Parse output by extracting content from XML tags.
@@ -165,17 +167,21 @@ class BaseJudge(ABC):
         # Try to validate any match; take the first
         for response_text in response_matches:
             try:
-                return parse_and_validate_output_str(response_text, self.cfg.output_schema)
+                return parse_and_validate_output_str(
+                    response_text, self.cfg.output_schema, self.cfg.output_format
+                )
             except ValidationFailedException as e:
                 last_error = ValidationFailedException(
                     str(e),
                     failed_output=output_str,
                 )
-        # Try to validate the entire output as JSON
+        # Try to validate the entire output
         # But only if the output _didn't_ contain a matching XML tag
         if not response_matches:
-            return parse_and_validate_output_str(output_str, self.cfg.output_schema)
+            return parse_and_validate_output_str(
+                output_str, self.cfg.output_schema, self.cfg.output_format
+            )
         raise last_error or ValidationFailedException(
             f"No valid output found in <{xml_key}> tags",
@@ -427,7 +433,7 @@ class MultiReflectionJudge(BaseJudge):
             f"We have sampled a judge {len(first_stage_results)} times to get {len(first_stage_results)} independent answers to the same rubric evaluation:\n"
             f"{first_stage_results_text}\n\n"
             f"Please reflect on these answers. Consider all the information and evidence presented. "
-            f"Return a final answer in the same JSON format as before."
+            f"Return a final answer in the same {self.cfg.output_format.upper()} format as before."
         )
         base_messages = self.cfg.materialize_messages(agent_run)
         reflection_prompt: list[ChatMessage] = list(base_messages) + [

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/docent/judges/types.py RENAMED Viewed

@@ -40,9 +40,9 @@ Agent run:
 Your goal is to judge the agent run according to the criteria given in the rubric. Start by faithfully following the decision procedure in extremely careful detail, step by step.
-When you are finished, output your final adjudication, surrounded by <response>...</response> tags. The response must be a valid JSON string which can be parsed with python `json.loads` without any additional processing. Double quotes (`"`) in the middle of a string in the JSON object must be escaped with a backslash.
+When you are finished, output your final adjudication, surrounded by <response>...</response> tags. {output_format_instructions}
-The JSON object you produce must adhere to the following schema:
+The object you produce must adhere to the following schema:
 {output_schema}
 """.strip()
@@ -61,9 +61,9 @@ Agent run:
 Your goal is to judge the agent run according to the criteria given in the rubric. Start by faithfully following the decision procedure in extremely careful detail, step by step. You must execute **one step in the decision procedure per assistant message turn**. After each turn, output a complete and detailed recount of all actions you took, and everything you discovered. Then call the `step_finished` tool.
-When you are finished going through the decision procedure, output your final adjudication, surrounded by <response>...</response> tags. The response must be a valid JSON string which can be parsed with python `json.loads` without any additional processing. Double quotes (`"`) in the middle of a string in the JSON object must be escaped with a backslash.
+When you are finished going through the decision procedure, output your final adjudication, surrounded by <response>...</response> tags. {output_format_instructions}
-The JSON object you produce must adhere to the following schema:
+The object you produce must adhere to the following schema:
 {output_schema}
 """.strip()
@@ -82,12 +82,22 @@ Agent run:
 Your goal is to judge the agent run according to the criteria given in the rubric. Start by faithfully following the decision procedure in extremely careful detail, step by step. You must *fully externalize* your reasoning work by outputting details in the assistant message, surrounded by <reasoning>...</reasoning> tags. The reasoning section can be as messy as you need. You should use *high* reasoning effort.
-When you are finished, output your final adjudication in the assistant message, surrounded by <response>...</response> tags. The response must be a valid JSON string which can be parsed with python `json.loads` without any additional processing. Double quotes (`"`) in the middle of a string in the JSON object must be escaped with a backslash.
+When you are finished, output your final adjudication in the assistant message, surrounded by <response>...</response> tags. {output_format_instructions}
-The JSON object you produce must adhere to the following schema:
+The object you produce must adhere to the following schema:
 {output_schema}
 """.strip()
+OUTPUT_FORMAT_INSTRUCTIONS = {
+    "json": (
+        "The response must be a valid JSON string which can be parsed with python"
+        " `json.loads` without any additional processing."
+        ' Double quotes (`"`) in the middle of a string in the JSON object'
+        " must be escaped with a backslash."
+    ),
+    "yaml": ("The response must be valid YAML that can be parsed with python `yaml.safe_load`."),
+}
 # Other judge defaults
 DEFAULT_JUDGE_OUTPUT_SCHEMA = {
     "type": "object",
@@ -162,6 +172,7 @@ class Rubric(BaseModel):
     # Output parsing
     output_parsing_mode: OutputParsingMode = OutputParsingMode.XML_KEY
     response_xml_key: str = "response"  # Only used when mode is XML_KEY
+    output_format: Literal["json", "yaml"] = "yaml"
     def materialize_messages(self, agent_run: AgentRun) -> list[ChatMessage]:
         """Construct the message list for rubric evaluation.
@@ -177,6 +188,9 @@ class Rubric(BaseModel):
         citation_instructions = (
             JUDGE_CITATION_INSTRUCTIONS if _schema_requests_citations(self.output_schema) else ""
         )
+        format_instructions = OUTPUT_FORMAT_INSTRUCTIONS.get(
+            self.output_format, OUTPUT_FORMAT_INSTRUCTIONS["yaml"]
+        )
         formatter = AgentRunTemplateFormatter(
             agent_run=agent_run,
             rubric_text=self.rubric_text,
@@ -186,8 +200,12 @@ class Rubric(BaseModel):
         # Format each template message
         messages: list[ChatMessage] = []
         for i, template in enumerate(self.prompt_templates):
-            # No need to strip citation instructions here, as this is a new codepath
-            content = formatter.format_template(template.content)
+            # Resolve output format instructions before template formatting,
+            # since it's not a user-facing template variable
+            template_content = template.content.replace(
+                "{output_format_instructions}", format_instructions
+            )
+            content = formatter.format_template(template_content)
             # Auto-append citation instructions to the last message
             if i == len(self.prompt_templates) - 1 and citation_instructions:
@@ -209,7 +227,10 @@ class Rubric(BaseModel):
     ) -> list[PromptTemplateMessage]:
         if not prompt_templates:
             raise ValueError("prompt_templates must include at least one template message.")
-        AgentRunTemplateFormatter.validate_template_variables([t.content for t in prompt_templates])
+        AgentRunTemplateFormatter.validate_template_variables(
+            [t.content for t in prompt_templates],
+            allowed_unknown={"output_format_instructions"},
+        )
         return prompt_templates
     @field_validator("output_schema")

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/docent/judges/util/parse_output.py RENAMED Viewed

@@ -1,17 +1,23 @@
-from typing import Any, cast
+from typing import Any, Literal, cast
 import jsonschema
+import yaml
 from docent._llm_util.data_models.exceptions import ValidationFailedException
 from docent.judges.util.forgiving_json import forgiving_json_loads
-def parse_and_validate_output_str(output_str: str, output_schema: dict[str, Any]) -> dict[str, Any]:
-    """Parse and validate LLM output against a JSON schema with forgiving parsing.
+def parse_and_validate_output_str(
+    output_str: str,
+    output_schema: dict[str, Any],
+    output_format: Literal["json", "yaml"] = "json",
+) -> dict[str, Any]:
+    """Parse and validate LLM output against a JSON schema.
     Args:
         output_str: The LLM output string to parse
         output_schema: The JSON schema to validate against
+        output_format: The format to parse as ("json" or "yaml")
     Returns:
         Validated output dict
@@ -20,10 +26,13 @@ def parse_and_validate_output_str(output_str: str, output_schema: dict[str, Any]
         ValidationFailedException: If parsing or validation fails
     """
     try:
-        output = forgiving_json_loads(output_str)
+        if output_format == "yaml":
+            output = yaml.safe_load(output_str)
+        else:
+            output = forgiving_json_loads(output_str)
     except Exception as e:
         raise ValidationFailedException(
-            f"Failed to parse JSON: {e}",
+            f"Failed to parse {output_format.upper()}: {e}",
             failed_output=output_str,
         )
@@ -34,6 +43,7 @@ def parse_and_validate_output_str(output_str: str, output_schema: dict[str, Any]
         )
     output_dict = cast(dict[str, Any], output)
     try:
         jsonschema.validate(output_dict, output_schema)
     except jsonschema.ValidationError as e:

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/docent/judges/util/template_formatter.py RENAMED Viewed

@@ -53,6 +53,7 @@ class AgentRunTemplateFormatter:
     - {agent_run} - Full agent run text representation
     - {rubric} - The rubric text
     - {output_schema} - JSON-formatted output schema
+    - {output_format_instructions} - Format-specific instructions (JSON or YAML)
     Example:
         formatter = AgentRunTemplateFormatter(

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/docent/sdk/_dql.py RENAMED Viewed

@@ -166,17 +166,24 @@ class DocentDqlMixin(DocentBase):
         Args:
             collection_id: ID of the Collection.
-            apply_base_filter: Whether to apply the collection view's base filter.
+            apply_base_filter: Deprecated. View base filters are no longer applied implicitly.
         Returns:
             list[str]: Agent run IDs for the collection.
         Raises:
+            ValueError: If apply_base_filter is True.
             requests.exceptions.HTTPError: If the API request fails.
         """
-        url = f"{self._api_url}/{collection_id}/agent_run_ids"
-        params = {"apply_base_filter": "true"} if apply_base_filter else None
-        response = self._session.get(url, params=params)
+        if apply_base_filter:
+            raise ValueError(
+                "apply_base_filter=True is no longer sufficient because view base filters are no "
+                "longer part of agent-run browsing. Pass an explicit filter to the query endpoint "
+                "instead."
+            )
+        url = f"{self._api_url}/{collection_id}/agent_run_ids/query"
+        response = self._session.post(url, json={})
         self._handle_response_errors(response)
         return response.json()

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/docent/sdk/_readings.py RENAMED Viewed

@@ -20,6 +20,7 @@ from docent.data_models.reading import (
     DqlOnlyStepSubmission,
     EndStepGroupSubmission,
     PlanJobCancelledEvent,
+    PlanJobFailedEvent,
     PlanSnapshotEvent,
     PlanStepCompletedEvent,
     PlanStepFailedEvent,
@@ -35,6 +36,11 @@ from docent.data_models.reading import (
     StepGroupSubmission,
 )
+class _ReadingPlanFailure(RuntimeError):
+    """Fatal reading-plan failure that should stop SDK scripts."""
 _plan_stream_event_adapter: TypeAdapter[PlanStreamEvent] = TypeAdapter(PlanStreamEvent)
 from docent.sdk._base import DocentBase
 from docent.sdk.llm_context import ContextItemRef, Prompt
@@ -254,11 +260,39 @@ class DocentReadingsMixin(DocentBase):
                 )
                 try:
                     self.flush(open_in_browser=True)
+                except _ReadingPlanFailure as exc:
+                    self._logger.error("%s", exc)
+                    raise SystemExit(1) from exc
                 except Exception:
                     self._logger.exception("Auto-flush failed")
         atexit.register(_atexit_flush)
+    def _register_notebook_hook(self) -> None:
+        if self._notebook_hook_registered:
+            return
+        try:
+            ip = get_ipython()  # type: ignore[name-defined]
+        except NameError:
+            return
+        if ip is None:
+            return
+        self._is_notebook = True
+        self._notebook_hook_registered = True
+        def _cell_post_run(*args: Any, **kwargs: Any) -> None:
+            if self._pending and self._auto_flush:
+                open_browser = self._plan_id is None
+                try:
+                    cast(Any, self).flush(open_in_browser=open_browser)
+                except _ReadingPlanFailure as exc:
+                    self._logger.error("%s", exc)
+                    raise
+                except Exception:
+                    self._logger.exception("Notebook auto-flush failed")
+        ip.events.register("post_run_cell", _cell_post_run)  # type: ignore[reportUnknownMemberType]
     def _detect_source_script(self) -> str | None:
         """Best-effort detection of the calling script or notebook name."""
         if self._is_notebook:
@@ -912,6 +946,42 @@ class DocentReadingsMixin(DocentBase):
             return f"{name} [${alias}]"
         return f"{default} [${alias}]"
+    def _reading_plan_url(self, collection_id: str, plan_id: str) -> str:
+        return f"{self._frontend_url}/dashboard/{collection_id}/reading-plan/{plan_id}"
+    def _plan_failure(
+        self,
+        *,
+        collection_id: str,
+        plan_id: str,
+        message: str,
+        alias: str | None = None,
+        name: str | None = None,
+        default_label: str = "Reading",
+    ) -> _ReadingPlanFailure:
+        plan_url = self._reading_plan_url(collection_id, plan_id)
+        if alias is None:
+            return _ReadingPlanFailure(
+                f"Reading plan {plan_id} failed: {message}. Open plan: {plan_url}"
+            )
+        label = self._format_step_label(name, alias, default_label)
+        return _ReadingPlanFailure(
+            f"{label} failed in reading plan {plan_id}: {message}. Open plan: {plan_url}"
+        )
+    @staticmethod
+    def _event_error_message(error: Any) -> str:
+        if error is None:
+            return "unknown error"
+        if hasattr(error, "message"):
+            return str(error.message)
+        if isinstance(error, dict):
+            error_data = cast(dict[str, Any], error)
+            message = error_data.get("message")
+            if message is not None:
+                return str(message)
+        return str(cast(object, error))
     def _preview_and_wait(
         self,
         *,
@@ -926,9 +996,21 @@ class DocentReadingsMixin(DocentBase):
         for es in submit_response.entry_statuses:
             if es.entry_type == "dql_only":
-                if es.status == "cached" and es.dql_preview is not None:
-                    self._log_dql_preview(pending_names.get(es.alias), es.alias, es.dql_preview)
-                elif es.status != "cached":
+                if es.status == "cached":
+                    if es.dql_preview is not None:
+                        self._log_dql_preview(pending_names.get(es.alias), es.alias, es.dql_preview)
+                    else:
+                        unsettled_dql_aliases[es.alias] = es
+                elif es.status == "failed":
+                    raise self._plan_failure(
+                        collection_id=collection_id,
+                        plan_id=submit_response.plan_id,
+                        alias=es.alias,
+                        name=pending_names.get(es.alias),
+                        default_label="DQL",
+                        message="DQL step failed during submission",
+                    )
+                else:
                     unsettled_dql_aliases[es.alias] = es
             elif es.entry_type == "reading":
@@ -939,27 +1021,34 @@ class DocentReadingsMixin(DocentBase):
                         es.result_count,
                         es.result_preview,
                     )
+                elif es.status == "failed":
+                    raise self._plan_failure(
+                        collection_id=collection_id,
+                        plan_id=submit_response.plan_id,
+                        alias=es.alias,
+                        name=pending_names.get(es.alias),
+                        message="reading step failed during submission",
+                    )
                 else:
                     unsettled_reading_aliases.add(es.alias)
-        if not unsettled_reading_aliases:
-            return
         plan_id = submit_response.plan_id
         assert plan_id is not None
-        completed_aliases: set[str] = {
-            es.alias
-            for es in submit_response.entry_statuses
-            if es.status == "cached" and es.entry_type == "reading"
-        }
+        if not unsettled_reading_aliases:
+            self._validate_unsettled_dql_aliases(
+                collection_id=collection_id,
+                plan_id=plan_id,
+                unsettled_dql_aliases=unsettled_dql_aliases,
+                pending_names=pending_names,
+            )
+            return
         self._block_on_plan_stream(
             collection_id=collection_id,
             plan_id=plan_id,
             unsettled_reading_aliases=unsettled_reading_aliases,
             unsettled_dql_aliases=unsettled_dql_aliases,
-            completed_aliases=completed_aliases,
             reading_handles=reading_handles,
             pending_names=pending_names,
         )
@@ -971,27 +1060,28 @@ class DocentReadingsMixin(DocentBase):
         plan_id: str,
         unsettled_reading_aliases: set[str],
         unsettled_dql_aliases: dict[str, PlanStepSubmissionStatus],
-        completed_aliases: set[str],
         reading_handles: dict[str, Reading],
         pending_names: dict[str, str | None],
     ) -> None:
-        """Connect to the plan SSE stream and block until all reading steps settle."""
+        """Connect to the plan SSE stream and block until all submitted steps settle."""
         stream_url = f"{self._api_url}/reading/{collection_id}/reading-plan/{plan_id}/stream"
         response = self._session.get(stream_url, stream=True)
         self._handle_response_errors(response)
-        alias_to_reading_id: dict[str, str] = {}
         pending = set(unsettled_reading_aliases)
         deadline = time.monotonic() + self._FLUSH_TIMEOUT_SECONDS
         for line in response.iter_lines(decode_unicode=True):
             if time.monotonic() > deadline:
-                self._logger.warning(
-                    "Timed out waiting for reading plan steps after %ds. Remaining steps: %s",
-                    self._FLUSH_TIMEOUT_SECONDS,
-                    ", ".join(sorted(pending)),
+                remaining = sorted(pending | set(unsettled_dql_aliases))
+                raise self._plan_failure(
+                    collection_id=collection_id,
+                    plan_id=plan_id,
+                    message=(
+                        f"timed out after {self._FLUSH_TIMEOUT_SECONDS}s waiting for "
+                        f"steps to settle: {', '.join(remaining)}"
+                    ),
                 )
-                break
             if not line or not line.startswith("data: "):
                 continue
@@ -1006,11 +1096,8 @@ class DocentReadingsMixin(DocentBase):
             if isinstance(event, PlanSnapshotEvent):
                 for step in event.steps:
-                    if step.reading_id is not None:
-                        alias_to_reading_id[step.alias] = step.reading_id
                     if step.alias in pending and step.derived_status in ("completed", "cached"):
                         pending.discard(step.alias)
-                        completed_aliases.add(step.alias)
                         if step.reading_id is not None:
                             self._log_step_completed_preview(
                                 collection_id,
@@ -1020,20 +1107,24 @@ class DocentReadingsMixin(DocentBase):
                                 reading_handles.get(step.alias),
                             )
                     elif step.alias in pending and step.derived_status == "failed":
-                        self._logger.warning(
-                            "Step %s failed",
-                            self._format_step_label(
-                                pending_names.get(step.alias), step.alias, "Reading"
-                            ),
+                        raise self._plan_failure(
+                            collection_id=collection_id,
+                            plan_id=plan_id,
+                            alias=step.alias,
+                            name=pending_names.get(step.alias),
+                            message="step reached failed status",
                         )
-                        pending.discard(step.alias)
-                if not pending:
+                self._try_preview_unresolved_dql(
+                    collection_id,
+                    plan_id,
+                    unsettled_dql_aliases,
+                    pending_names,
+                )
+                if not pending and not unsettled_dql_aliases:
                     break
                 continue
             if isinstance(event, PlanStepCompletedEvent):
-                alias_to_reading_id[event.step_alias] = event.reading_id
-                completed_aliases.add(event.step_alias)
                 if event.step_alias in pending:
                     pending.discard(event.step_alias)
                     self._log_step_completed_preview(
@@ -1048,36 +1139,57 @@ class DocentReadingsMixin(DocentBase):
                     collection_id,
                     plan_id,
                     unsettled_dql_aliases,
-                    completed_aliases,
-                    alias_to_reading_id,
                     pending_names,
                 )
             elif isinstance(event, PlanStepFailedEvent) and event.step_alias in pending:
-                label = self._format_step_label(
-                    pending_names.get(event.step_alias), event.step_alias, "Reading"
+                raise self._plan_failure(
+                    collection_id=collection_id,
+                    plan_id=plan_id,
+                    alias=event.step_alias,
+                    name=pending_names.get(event.step_alias),
+                    message=self._event_error_message(event.error),
                 )
-                msg = event.error.message if event.error else "unknown error"
-                self._logger.warning("Step %s failed: %s", label, msg)
-                pending.discard(event.step_alias)
             elif isinstance(event, PlanJobCancelledEvent):
-                self._logger.warning(
-                    "Reading plan job was cancelled. Remaining steps: %s",
-                    ", ".join(sorted(pending)),
+                remaining = sorted(pending | set(unsettled_dql_aliases))
+                raise self._plan_failure(
+                    collection_id=collection_id,
+                    plan_id=plan_id,
+                    message=(
+                        "job was cancelled"
+                        + (f"; remaining steps: {', '.join(remaining)}" if remaining else "")
+                    ),
                 )
-                break
-            if not pending:
+            elif isinstance(event, PlanJobFailedEvent):
+                raise self._plan_failure(
+                    collection_id=collection_id,
+                    plan_id=plan_id,
+                    message=f"job failed: {self._event_error_message(event.error)}",
+                )
+            if not pending and not unsettled_dql_aliases:
                 break
+        if pending:
+            raise self._plan_failure(
+                collection_id=collection_id,
+                plan_id=plan_id,
+                message=f"stream ended before steps settled: {', '.join(sorted(pending))}",
+            )
+        self._validate_unsettled_dql_aliases(
+            collection_id=collection_id,
+            plan_id=plan_id,
+            unsettled_dql_aliases=unsettled_dql_aliases,
+            pending_names=pending_names,
+        )
     def _try_preview_unresolved_dql(
         self,
         collection_id: str,
         plan_id: str,
         unsettled_dql_aliases: dict[str, PlanStepSubmissionStatus],
-        completed_aliases: set[str],
-        alias_to_reading_id: dict[str, str],
         pending_names: dict[str, str | None],
     ) -> None:
         """Check if any unresolved DQL steps can now be previewed."""
@@ -1091,6 +1203,34 @@ class DocentReadingsMixin(DocentBase):
         for alias in newly_resolved:
             unsettled_dql_aliases.pop(alias, None)
+    def _validate_unsettled_dql_aliases(
+        self,
+        *,
+        collection_id: str,
+        plan_id: str,
+        unsettled_dql_aliases: dict[str, PlanStepSubmissionStatus],
+        pending_names: dict[str, str | None],
+    ) -> None:
+        """Execute any remaining DQL-only steps or fail if they cannot resolve."""
+        self._try_preview_unresolved_dql(
+            collection_id,
+            plan_id,
+            unsettled_dql_aliases,
+            pending_names,
+        )
+        if not unsettled_dql_aliases:
+            return
+        aliases = ", ".join(
+            self._format_step_label(pending_names.get(alias), alias, "DQL")
+            for alias in sorted(unsettled_dql_aliases)
+        )
+        raise self._plan_failure(
+            collection_id=collection_id,
+            plan_id=plan_id,
+            message=f"DQL step dependencies did not resolve: {aliases}",
+        )
     def _try_execute_dql_for_alias(
         self,
         collection_id: str,
@@ -1099,32 +1239,56 @@ class DocentReadingsMixin(DocentBase):
         name: str | None,
     ) -> dict[str, Any] | None:
         """Attempt to execute a DQL step that was previously unresolved."""
-        try:
-            plan_state = self._get_reading_plan_state(collection_id, plan_id)
-            for step in plan_state.get("steps", []):
-                if step.get("alias") == alias:
-                    status = step.get("derived_status")
-                    if status == "cached":
-                        dql_query = step.get("dql_query")
-                        if dql_query:
-                            result = cast(Any, self).execute_dql(
-                                collection_id,
-                                dql_query,
-                                reading_plan_id=plan_id,
-                            )
-                            from docent.data_models.reading import DqlPreview
-                            preview = DqlPreview(
-                                columns=result.get("columns", []),
-                                rows=result.get("rows", [])[:10],
-                                truncated=result.get("truncated", False),
-                                row_count=result.get("row_count", 0),
-                            )
-                            self._log_dql_preview(name, alias, preview)
-                            return result
-                    break
-        except Exception:
-            self._logger.debug("Could not preview DQL step %s", alias, exc_info=True)
+        plan_state = self._get_reading_plan_state(collection_id, plan_id)
+        for step in plan_state.get("steps", []):
+            if step.get("alias") == alias:
+                status = step.get("derived_status")
+                if status == "cached":
+                    dql_query = step.get("dql_query")
+                    if not dql_query:
+                        raise self._plan_failure(
+                            collection_id=collection_id,
+                            plan_id=plan_id,
+                            alias=alias,
+                            name=name,
+                            default_label="DQL",
+                            message="DQL step has no query",
+                        )
+                    try:
+                        result = cast(Any, self).execute_dql(
+                            collection_id,
+                            dql_query,
+                            reading_plan_id=plan_id,
+                        )
+                    except Exception as exc:
+                        raise self._plan_failure(
+                            collection_id=collection_id,
+                            plan_id=plan_id,
+                            alias=alias,
+                            name=name,
+                            default_label="DQL",
+                            message=str(exc),
+                        ) from exc
+                    from docent.data_models.reading import DqlPreview
+                    preview = DqlPreview(
+                        columns=result.get("columns", []),
+                        rows=result.get("rows", [])[:10],
+                        truncated=result.get("truncated", False),
+                        row_count=result.get("row_count", 0),
+                    )
+                    self._log_dql_preview(name, alias, preview)
+                    return result
+                if status == "failed":
+                    raise self._plan_failure(
+                        collection_id=collection_id,
+                        plan_id=plan_id,
+                        alias=alias,
+                        name=name,
+                        default_label="DQL",
+                        message="DQL step reached failed status",
+                    )
+                break
         return None
     def _log_step_completed_preview(
@@ -1273,8 +1437,14 @@ class DocentReadingsMixin(DocentBase):
             event_type = event.get("type", "")
             if event.get("step_alias") != alias:
                 if event_type == "job_failed":
-                    raise RuntimeError(
-                        f"Reading plan job {job_id!r} failed before {alias!r} completed"
+                    raise self._plan_failure(
+                        collection_id=collection_id,
+                        plan_id=plan_id,
+                        alias=alias,
+                        message=(
+                            f"job {job_id!r} failed before step completed: "
+                            f"{self._event_error_message(event.get('error'))}"
+                        ),
                     )
                 if event_type == "job_completed":
                     break
@@ -1282,7 +1452,12 @@ class DocentReadingsMixin(DocentBase):
             if event_type == "step_completed":
                 return cast(str | None, event.get("reading_id"))
             if event_type == "step_failed":
-                raise RuntimeError(f"Reading plan step {alias!r} failed")
+                raise self._plan_failure(
+                    collection_id=collection_id,
+                    plan_id=plan_id,
+                    alias=alias,
+                    message=self._event_error_message(event.get("error")),
+                )
         return None
     def _wait_for_reading(self, reading: Reading) -> None:
@@ -1325,7 +1500,12 @@ class DocentReadingsMixin(DocentBase):
                 reading_id = step_reading_id
                 break
             if step_status == "failed":
-                raise RuntimeError(f"Reading plan step {alias!r} failed")
+                raise self._plan_failure(
+                    collection_id=collection_id,
+                    plan_id=plan_id,
+                    alias=alias,
+                    message="step reached failed status",
+                )
             active_job_id = plan_state.get("active_job_id")
             if isinstance(active_job_id, str) and active_job_id:

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/docent/sdk/llm_context.py RENAMED Viewed

@@ -988,7 +988,7 @@ class LLMContext:
         if interactive:
             context_description = "You are a helpful assistant that specializes in analyzing transcripts of AI agent behavior."
         else:
-            context_description = "You are a tasked with analyzing transcripts of AI agent behavior. You are not interacting with a user directly."
+            context_description = "You are tasked with analyzing transcripts of AI agent behavior. You are not interacting with a user directly."
         if not include_citations:
             return context_description

{docent_python-0.1.63a0 → docent_python-0.1.64a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.63-alpha"
+version = "0.1.64-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]