PyPI - docent-python - Versions diffs - 0.1.51a0__tar.gz → 0.1.53a0__tar.gz - Mend

docent-python 0.1.51a0tar.gz → 0.1.53a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{docent_python-0.1.51a0 → docent_python-0.1.53a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docent-python
-Version: 0.1.51a0
+Version: 0.1.53a0
 Summary: Docent SDK
 Project-URL: Homepage, https://github.com/TransluceAI/docent
 Project-URL: Issues, https://github.com/TransluceAI/docent/issues

{docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/data_models/feedback.py RENAMED Viewed

@@ -101,11 +101,7 @@ class QAPair(BaseModel):
     """A single review-focus answer captured for one run."""
     # What the user was shown
-    focus_item: LabelingRequestFocusItem
-    # Whether the user selected a sample answer or not
-    selected_sample_index: int | None = None
-    is_custom_response: bool = False
+    focus_index: int
     # What the user responded
     answer: str
@@ -115,9 +111,15 @@ class QAPair(BaseModel):
     status: Literal["answered", "skipped"]
     timestamp: datetime = Field(default_factory=datetime.now)
-    def to_str(self, indent: int = 0) -> str:
+    def to_str(self, labeling_request: "LabelingRequest", indent: int = 0) -> str:
         """Render QA pair in a deterministic LLM-facing format."""
-        lines = self.focus_item.to_str(indent=indent).splitlines()
+        if self.focus_index < 0 or self.focus_index >= len(labeling_request.review_focus):
+            raise ValueError(
+                f"focus_index={self.focus_index} is out of bounds for review_focus length "
+                f"{len(labeling_request.review_focus)}"
+            )
+        focus_item = labeling_request.review_focus[self.focus_index]
+        lines = focus_item.to_str(indent=indent).splitlines()
         lines.append(f"User answer: {_text_or_na(self.answer)}")
         lines.append(f"User explanation: {_text_or_na(self.explanation)}")
         return "\n".join(lines)
@@ -204,6 +206,7 @@ class LabeledRun(BaseModel):
 class AgentRunFeedbackContext(BaseModel):
     """All feedback collected for a single agent run."""
+    feedback_context_id: str | None = None
     agent_run_id: str
     round: int
     created_at: datetime = Field(default_factory=datetime.now)
@@ -232,7 +235,10 @@ class AgentRunFeedbackContext(BaseModel):
             qa_lines.append("N/A")
         else:
             for qa_idx, qa_pair in enumerate(self.qa_pairs, start=1):
-                qa_entry_lines = qa_pair.to_str(indent=indent).splitlines()
+                qa_entry_lines = qa_pair.to_str(
+                    labeling_request=self.labeling_request,
+                    indent=indent,
+                ).splitlines()
                 qa_lines.extend(_tag_block(f"QA {qa_idx}", qa_entry_lines, indent))
         lines.extend(_tag_block("Question Answer Pairs", qa_lines, indent))
@@ -280,6 +286,24 @@ class FeedbackContextsResponse(BaseModel):
     contexts: list[FeedbackContext] = Field(default_factory=list[FeedbackContext])
+FeedbackJobStatus = Literal["pending", "running", "cancelling", "canceled", "completed"]
+class StartFeedbackContextsJobResponse(BaseModel):
+    """Response for enqueueing or reusing a feedback contexts job."""
+    job_id: str
+class FeedbackContextsJobStateResponse(BaseModel):
+    """Current feedback contexts job status and round-scoped contexts."""
+    job_id: str | None
+    job_status: FeedbackJobStatus | None
+    current_round: int
+    contexts: list[FeedbackContext] = Field(default_factory=list[FeedbackContext])
 class UserData(BaseModel):
     """User Data (U) for user-context inference and downstream evaluation."""

{docent_python-0.1.51a0 → docent_python-0.1.53a0}/docent/sdk/client.py RENAMED Viewed

@@ -21,7 +21,11 @@ from tqdm import tqdm
 from docent._llm_util.providers.preference_types import ModelOption
 from docent._log_util.logger import LoggerAdapter, get_logger
 from docent.data_models.agent_run import AgentRun
-from docent.data_models.feedback import AgentRunFeedbackContext, FeedbackContextsResponse
+from docent.data_models.feedback import (
+    AgentRunFeedbackContext,
+    FeedbackContextsJobStateResponse,
+    StartFeedbackContextsJobResponse,
+)
 from docent.data_models.judge import Label
 from docent.judges.util.meta_schema import validate_judge_result_schema
 from docent.loaders import load_inspect
@@ -878,6 +882,44 @@ class Docent:
         llm_svc = BaseLLMService()  # reads API keys from environment
         return build_judge(rubric, llm_svc)
+    def start_rubric_eval_job(
+        self,
+        collection_id: str,
+        rubric_id: str,
+        max_agent_runs: int | None = None,
+        n_rollouts_per_input: int = 1,
+        max_parallel: int | None = None,
+    ) -> str:
+        """Start or reuse a rubric evaluation job.
+        Args:
+            collection_id: ID of the Collection.
+            rubric_id: The ID of the rubric to evaluate.
+            max_agent_runs: Optional limit on the number of agent runs to evaluate.
+            n_rollouts_per_input: Number of judge rollouts to generate per agent run.
+            max_parallel: Optional backend concurrency override for the evaluation job.
+        Returns:
+            str: The ID of the created or reused job.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+            ValueError: If the response does not contain a job ID.
+        """
+        url = f"{self._api_url}/rubric/{collection_id}/{rubric_id}/evaluate"
+        payload = {
+            "max_agent_runs": max_agent_runs,
+            "n_rollouts_per_input": n_rollouts_per_input,
+            "max_parallel": max_parallel,
+        }
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+        job_id = response.json().get("job_id")
+        if job_id is None:
+            raise ValueError("Failed to start rubric eval job: 'job_id' missing in response.")
+        return job_id
     def get_rubric_run_state(
         self,
         collection_id: str,
@@ -886,7 +928,7 @@ class Docent:
         filter_dict: dict[str, Any] | None = None,
         include_failures: bool = False,
     ) -> dict[str, Any]:
-        """Get rubric run state for a given collection and rubric.
+        """Get rubric evaluation results and progress for a collection/rubric.
         Args:
             collection_id: ID of the Collection.
@@ -900,6 +942,10 @@ class Docent:
         Raises:
             requests.exceptions.HTTPError: If the API request fails.
+        Note:
+            This method does not start evaluation. Use `start_rubric_eval_job()` to
+            enqueue or reuse a rubric evaluation job.
         """
         url = f"{self._api_url}/rubric/{collection_id}/{rubric_id}/rubric_run_state"
         body = {
@@ -979,7 +1025,7 @@ class Docent:
         self._handle_response_errors(response)
         return response.json()["feedback_session_id"]
-    def get_feedback_contexts(
+    def start_feedback_contexts_job(
         self,
         collection_id: str,
         feedback_session_id: str,
@@ -989,8 +1035,8 @@ class Docent:
         candidate_pool_limit: int = 1_000,
         where_clause: str | None = None,
         increment_round: bool = False,
-    ) -> FeedbackContextsResponse:
-        """Generate or fetch feedback contexts for the current session round."""
+    ) -> StartFeedbackContextsJobResponse:
+        """Start or reuse a background job to compute feedback contexts for a session."""
         payload = {
             "feedback_session_id": feedback_session_id,
             "num_samples": num_samples,
@@ -1000,10 +1046,24 @@ class Docent:
             "where_clause": where_clause,
             "increment_round": increment_round,
         }
-        url = f"{self._api_url}/feedback/{collection_id}/contexts"
+        url = f"{self._api_url}/feedback/{collection_id}/contexts/start"
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+        return StartFeedbackContextsJobResponse.model_validate(response.json())
+    def get_feedback_contexts(
+        self,
+        collection_id: str,
+        feedback_session_id: str,
+    ) -> FeedbackContextsJobStateResponse:
+        """Get feedback contexts state for a session, including job status and current round data."""
+        payload = {
+            "feedback_session_id": feedback_session_id,
+        }
+        url = f"{self._api_url}/feedback/{collection_id}/contexts/state"
         response = self._session.post(url, json=payload)
         self._handle_response_errors(response)
-        return FeedbackContextsResponse.model_validate(response.json())
+        return FeedbackContextsJobStateResponse.model_validate(response.json())
     def get_agent_run_feedback_contexts_by_session(
         self,
@@ -1012,9 +1072,8 @@ class Docent:
     ) -> list[AgentRunFeedbackContext]:
         """Get all persisted AgentRun feedback contexts for a feedback session.
-        Unlike `get_feedback_contexts`, this method only reads existing session data and
-        returns fully hydrated AgentRun feedback context objects from the database,
-        including QA pairs and label (if present).
+        Returns fully hydrated AgentRun feedback context objects from the database,
+        including QA pairs and labels (if present), across all rounds in the session.
         """
         url = f"{self._api_url}/feedback/{collection_id}/session/{feedback_session_id}/contexts"
         response = self._session.get(url)

{docent_python-0.1.51a0 → docent_python-0.1.53a0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "docent-python"
 description = "Docent SDK"
-version = "0.1.51-alpha"
+version = "0.1.53-alpha"
 authors = [
   { name="Transluce", email="info@transluce.org" },
 ]