PyPI - docent-python - Versions diffs - 0.1.17a0__py3-none-any.whl → 0.1.27a0__py3-none-any.whl - Mend

docent-python 0.1.17a0py3-none-any.whl → 0.1.27a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docent-python might be problematic. Click here for more details.

Files changed (45) hide show

docent/_llm_util/__init__.py +0 -0
docent/_llm_util/data_models/__init__.py +0 -0
docent/_llm_util/data_models/exceptions.py +48 -0
docent/_llm_util/data_models/llm_output.py +331 -0
docent/_llm_util/llm_cache.py +193 -0
docent/_llm_util/llm_svc.py +472 -0
docent/_llm_util/model_registry.py +130 -0
docent/_llm_util/providers/__init__.py +0 -0
docent/_llm_util/providers/anthropic.py +537 -0
docent/_llm_util/providers/common.py +41 -0
docent/_llm_util/providers/google.py +530 -0
docent/_llm_util/providers/openai.py +745 -0
docent/_llm_util/providers/openrouter.py +375 -0
docent/_llm_util/providers/preference_types.py +104 -0
docent/_llm_util/providers/provider_registry.py +164 -0
docent/data_models/__init__.py +2 -0
docent/data_models/agent_run.py +6 -5
docent/data_models/chat/__init__.py +6 -1
docent/data_models/citation.py +103 -22
docent/data_models/judge.py +19 -0
docent/data_models/metadata_util.py +16 -0
docent/data_models/remove_invalid_citation_ranges.py +23 -10
docent/data_models/transcript.py +20 -16
docent/data_models/util.py +170 -0
docent/judges/__init__.py +23 -0
docent/judges/analysis.py +77 -0
docent/judges/impl.py +587 -0
docent/judges/runner.py +129 -0
docent/judges/stats.py +205 -0
docent/judges/types.py +311 -0
docent/judges/util/forgiving_json.py +108 -0
docent/judges/util/meta_schema.json +86 -0
docent/judges/util/meta_schema.py +29 -0
docent/judges/util/parse_output.py +87 -0
docent/judges/util/voting.py +139 -0
docent/sdk/agent_run_writer.py +62 -19
docent/sdk/client.py +244 -23
docent/trace.py +413 -90
{docent_python-0.1.17a0.dist-info → docent_python-0.1.27a0.dist-info}/METADATA +11 -5
docent_python-0.1.27a0.dist-info/RECORD +59 -0
docent/data_models/metadata.py +0 -229
docent/data_models/yaml_util.py +0 -12
docent_python-0.1.17a0.dist-info/RECORD +0 -32
{docent_python-0.1.17a0.dist-info → docent_python-0.1.27a0.dist-info}/WHEEL +0 -0
{docent_python-0.1.17a0.dist-info → docent_python-0.1.27a0.dist-info}/licenses/LICENSE.md +0 -0

docent/sdk/client.py CHANGED Viewed

@@ -8,6 +8,8 @@ from tqdm import tqdm
 from docent._log_util.logger import get_logger
 from docent.data_models.agent_run import AgentRun
+from docent.data_models.judge import Label
+from docent.judges.util.meta_schema import validate_judge_result_schema
 from docent.loaders import load_inspect
 logger = get_logger(__name__)
@@ -48,13 +50,24 @@ class Docent:
         self._login(api_key)
+    def _handle_response_errors(self, response: requests.Response):
+        """Handle API response and raise informative errors."""
+        if response.status_code >= 400:
+            try:
+                error_data = response.json()
+                detail = error_data.get("detail", response.text)
+            except Exception:
+                detail = response.text
+            raise requests.HTTPError(f"HTTP {response.status_code}: {detail}", response=response)
     def _login(self, api_key: str):
         """Login with email/password to establish session."""
         self._session.headers.update({"Authorization": f"Bearer {api_key}"})
         url = f"{self._server_url}/api-keys/test"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         logger.info("Logged in with API key")
         return
@@ -90,7 +103,7 @@ class Docent:
         }
         response = self._session.post(url, json=payload)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         response_data = response.json()
         collection_id = response_data.get("collection_id")
@@ -134,13 +147,13 @@ class Docent:
                 payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch]}
                 response = self._session.post(url, json=payload)
-                response.raise_for_status()
+                self._handle_response_errors(response)
                 pbar.update(len(batch))
         url = f"{self._server_url}/{collection_id}/compute_embeddings"
         response = self._session.post(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         logger.info(f"Successfully added {total_runs} agent runs to Collection '{collection_id}'")
         return {"status": "success", "total_runs_added": total_runs}
@@ -156,7 +169,7 @@ class Docent:
         """
         url = f"{self._server_url}/collections"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def list_rubrics(self, collection_id: str) -> list[dict[str, Any]]:
@@ -173,25 +186,28 @@ class Docent:
         """
         url = f"{self._server_url}/rubric/{collection_id}/rubrics"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
-    def get_rubric_run_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
+    def get_rubric_run_state(
+        self, collection_id: str, rubric_id: str, version: int | None = None
+    ) -> dict[str, Any]:
         """Get rubric run state for a given collection and rubric.
         Args:
             collection_id: ID of the Collection.
             rubric_id: The ID of the rubric to get run state for.
+            version: The version of the rubric to get run state for. If None, the latest version is used.
         Returns:
-            dict: Dictionary containing rubric run state with results, job_id, and total_agent_runs.
+            dict: Dictionary containing rubric run state with results, job_id, and total_results_needed.
         Raises:
             requests.exceptions.HTTPError: If the API request fails.
         """
         url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/rubric_run_state"
-        response = self._session.get(url)
-        response.raise_for_status()
+        response = self._session.get(url, params={"version": version})
+        self._handle_response_errors(response)
         return response.json()
     def get_clustering_state(self, collection_id: str, rubric_id: str) -> dict[str, Any]:
@@ -209,7 +225,7 @@ class Docent:
         """
         url = f"{self._server_url}/rubric/{collection_id}/{rubric_id}/clustering_job"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def get_cluster_centroids(self, collection_id: str, rubric_id: str) -> list[dict[str, Any]]:
@@ -244,6 +260,114 @@ class Docent:
         clustering_state = self.get_clustering_state(collection_id, rubric_id)
         return clustering_state.get("assignments", {})
+    def create_label_set(
+        self,
+        collection_id: str,
+        name: str,
+        label_schema: dict[str, Any],
+        description: str | None = None,
+    ) -> str:
+        """Create a new label set with a JSON schema.
+        Args:
+            collection_id: ID of the collection.
+            name: Name of the label set.
+            label_schema: JSON schema for validating labels in this set.
+            description: Optional description of the label set.
+        Returns:
+            str: The ID of the created label set.
+        Raises:
+            ValueError: If the response is missing the label_set_id.
+            jsonschema.ValidationError: If the label schema is invalid.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        validate_judge_result_schema(label_schema)
+        url = f"{self._server_url}/label/{collection_id}/label_set"
+        payload = {
+            "name": name,
+            "label_schema": label_schema,
+            "description": description,
+        }
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+        return response.json()["label_set_id"]
+    def add_label(
+        self,
+        collection_id: str,
+        label: Label,
+    ) -> dict[str, str]:
+        """Create a label in a label set.
+        Args:
+            collection_id: ID of the Collection.
+            label: A `Label` object that must comply with the label set's schema.
+        Returns:
+            dict: API response containing the label_id.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails or validation errors occur.
+        """
+        url = f"{self._server_url}/label/{collection_id}/label"
+        payload = {"label": label.model_dump(mode="json")}
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+        return response.json()
+    def add_labels(
+        self,
+        collection_id: str,
+        labels: list[Label],
+    ) -> dict[str, Any]:
+        """Create multiple labels.
+        Args:
+            collection_id: ID of the Collection.
+            labels: List of `Label` objects.
+        Returns:
+            dict: API response containing label_ids list and optional errors list.
+        Raises:
+            ValueError: If no labels are provided.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        if not labels:
+            raise ValueError("labels must contain at least one entry")
+        url = f"{self._server_url}/label/{collection_id}/labels"
+        payload = {"labels": [label.model_dump(mode="json") for label in labels]}
+        response = self._session.post(url, json=payload)
+        self._handle_response_errors(response)
+        return response.json()
+    def get_labels(
+        self, collection_id: str, label_set_id: str, filter_valid_labels: bool = False
+    ) -> list[dict[str, Any]]:
+        """Retrieve all labels in a label set.
+        Args:
+            collection_id: ID of the Collection.
+            label_set_id: ID of the label set to fetch labels for.
+            filter_valid_labels: If True, only return labels that match the label set schema
+                INCLUDING requirements. Default is False (returns all labels).
+        Returns:
+            list: List of label dictionaries.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/label/{collection_id}/label_set/{label_set_id}/labels"
+        params = {"filter_valid_labels": filter_valid_labels}
+        response = self._session.get(url, params=params)
+        self._handle_response_errors(response)
+        return response.json()
     def get_agent_run(self, collection_id: str, agent_run_id: str) -> AgentRun | None:
         """Get a specific agent run by its ID.
@@ -259,7 +383,7 @@ class Docent:
         """
         url = f"{self._server_url}/{collection_id}/agent_run"
         response = self._session.get(url, params={"agent_run_id": agent_run_id})
-        response.raise_for_status()
+        self._handle_response_errors(response)
         if response.json() is None:
             return None
         else:
@@ -267,6 +391,24 @@ class Docent:
             # TODO(mengk): kinda hacky
             return AgentRun.model_validate(response.json())
+    def get_chat_sessions(self, collection_id: str, agent_run_id: str) -> list[dict[str, Any]]:
+        """Get all chat sessions for an agent run, excluding judge result sessions.
+        Args:
+            collection_id: ID of the Collection.
+            agent_run_id: The ID of the agent run to retrieve chat sessions for.
+        Returns:
+            list: List of chat session dictionaries.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/chat/{collection_id}/{agent_run_id}/sessions"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return response.json()
     def make_collection_public(self, collection_id: str) -> dict[str, Any]:
         """Make a collection publicly accessible to anyone with the link.
@@ -281,7 +423,7 @@ class Docent:
         """
         url = f"{self._server_url}/{collection_id}/make_public"
         response = self._session.post(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         logger.info(f"Successfully made Collection '{collection_id}' public")
         return response.json()
@@ -303,17 +445,96 @@ class Docent:
         payload = {"email": email}
         response = self._session.post(url, json=payload)
-        try:
-            response.raise_for_status()
-        except requests.exceptions.HTTPError:
-            if response.status_code == 404:
-                raise ValueError(f"The user you are trying to share with ({email}) does not exist.")
-            else:
-                raise  # Re-raise the original exception
+        self._handle_response_errors(response)
         logger.info(f"Successfully shared Collection '{collection_id}' with {email}")
         return response.json()
+    def get_dql_schema(self, collection_id: str) -> dict[str, Any]:
+        """Retrieve the DQL schema for a collection.
+        Args:
+            collection_id: ID of the Collection.
+        Returns:
+            dict: Dictionary containing available tables, columns, and metadata for DQL queries.
+        Raises:
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        url = f"{self._server_url}/dql/{collection_id}/schema"
+        response = self._session.get(url)
+        self._handle_response_errors(response)
+        return response.json()
+    def execute_dql(self, collection_id: str, dql: str) -> dict[str, Any]:
+        """Execute a DQL query against a collection.
+        Args:
+            collection_id: ID of the Collection.
+            dql: The DQL query string to execute.
+        Returns:
+            dict: Query execution results including rows, columns, execution metadata, and selected columns.
+        Raises:
+            ValueError: If `dql` is empty.
+            requests.exceptions.HTTPError: If the API request fails or the query is invalid.
+        """
+        if not dql.strip():
+            raise ValueError("dql must be a non-empty string")
+        url = f"{self._server_url}/dql/{collection_id}/execute"
+        response = self._session.post(url, json={"dql": dql})
+        self._handle_response_errors(response)
+        return response.json()
+    def select_agent_run_ids(
+        self,
+        collection_id: str,
+        where_clause: str | None = None,
+        limit: int | None = None,
+    ) -> list[str]:
+        """Convenience helper to fetch agent run IDs via DQL.
+        Args:
+            collection_id: ID of the Collection to query.
+            where_clause: Optional DQL WHERE clause applied to the agent_runs table.
+            limit: Optional LIMIT applied to the underlying DQL query.
+        Returns:
+            list[str]: Agent run IDs matching the criteria.
+        Raises:
+            ValueError: If the inputs are invalid.
+            requests.exceptions.HTTPError: If the API request fails.
+        """
+        query = "SELECT agent_runs.id AS agent_run_id FROM agent_runs"
+        if where_clause:
+            where_clause = where_clause.strip()
+            if not where_clause:
+                raise ValueError("where_clause must be a non-empty string when provided")
+            query += f" WHERE {where_clause}"
+        if limit is not None:
+            if limit <= 0:
+                raise ValueError("limit must be a positive integer when provided")
+            query += f" LIMIT {limit}"
+        result = self.execute_dql(collection_id, query)
+        rows = result.get("rows", [])
+        agent_run_ids = [str(row[0]) for row in rows if row]
+        if result.get("truncated"):
+            logger.warning(
+                "DQL query truncated at applied limit %s; returning %s agent run IDs",
+                result.get("applied_limit"),
+                len(agent_run_ids),
+            )
+        return agent_run_ids
     def list_agent_run_ids(self, collection_id: str) -> list[str]:
         """Get all agent run IDs for a collection.
@@ -328,7 +549,7 @@ class Docent:
         """
         url = f"{self._server_url}/{collection_id}/agent_run_ids"
         response = self._session.get(url)
-        response.raise_for_status()
+        self._handle_response_errors(response)
         return response.json()
     def recursively_ingest_inspect_logs(self, collection_id: str, fpath: str):
@@ -393,7 +614,7 @@ class Docent:
                         payload = {"agent_runs": [ar.model_dump(mode="json") for ar in batch_list]}
                         response = self._session.post(url, json=payload)
-                        response.raise_for_status()
+                        self._handle_response_errors(response)
                         runs_from_file += len(batch_list)
                         file_pbar.update(len(batch_list))
@@ -406,7 +627,7 @@ class Docent:
             logger.info("Computing embeddings for added runs...")
             url = f"{self._server_url}/{collection_id}/compute_embeddings"
             response = self._session.post(url)
-            response.raise_for_status()
+            self._handle_response_errors(response)
         logger.info(
             f"Successfully ingested {total_runs_added} total agent runs from {len(eval_files)} files"

docent-python 0.1.17a0__py3-none-any.whl → 0.1.27a0__py3-none-any.whl

Potentially problematic release.

docent-python 0.1.17a0py3-none-any.whl → 0.1.27a0py3-none-any.whl