PyPI - indexify - Versions diffs - 0.0.21__tar.gz → 0.0.22__tar.gz - Mend

@@ -9,10 +9,10 @@ from .extractor import Extractor
 from .extraction_policy import ExtractionPolicy, ExtractionGraph
 from .index import Index
 from .utils import json_set_default
+from .error import Error
 from .data_containers import TextChunk
 from indexify.exceptions import ApiException
 from dataclasses import dataclass
 from typing import List, Optional, Union, Dict
 Document = namedtuple("Document", ["text", "labels", "id"])
@@ -75,12 +75,7 @@ class IndexifyClient:
         self._timeout = kwargs.get("timeout")
         # get namespace data
-        response = self.get(f"namespaces/{self.namespace}")
-        response.raise_for_status()
-        resp_json = response.json()
-        # initialize extraction_policies
-        for eb in resp_json["namespace"]["extraction_graphs"]:
-            self.extraction_graphs.append(ExtractionGraph.from_dict(eb))
+        self.extraction_graphs = self.get_extraction_graphs()
     @classmethod
     def with_mtls(
@@ -130,12 +125,18 @@ class IndexifyClient:
         return client
     def _request(self, method: str, **kwargs) -> httpx.Response:
-        response = self._client.request(method, timeout=self._timeout, **kwargs)
         try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as exc:
-            print(f"exception: {exc}, response text: {response.text}")
-            raise exc
+            response = self._client.request(method, timeout=self._timeout, **kwargs)
+            status_code = str(response.status_code)
+            if status_code.startswith("4") or status_code.startswith("5"):
+                error = Error.from_tonic_error_string(str(response.url), response.text)
+                self.__print_additional_error_context(error)
+                raise error
+        except httpx.ConnectError:
+            message = f"Make sure the server is running and accesible at {self._service_url}"
+            error = Error(status="ConnectionError", message=message)
+            print(error)
+            raise error
         return response
     def get(self, endpoint: str, **kwargs) -> httpx.Response:
@@ -291,7 +292,6 @@ class IndexifyClient:
             List[Index]: list of indexes in the current namespace
         """
         response = self.get(f"namespaces/{self.namespace}/indexes")
-        response.raise_for_status()
         return response.json()["indexes"]
     def extractors(self) -> List[Extractor]:
@@ -308,17 +308,18 @@ class IndexifyClient:
             extractors.append(Extractor.from_dict(ed))
         return extractors
-    def get_extraction_policies(self):
+    def get_extraction_graphs(self) -> List[ExtractionGraph]:
         """
         Retrieve and update the list of extraction policies for the current namespace.
         """
         response = self.get(f"namespaces/{self.namespace}")
-        response.raise_for_status()
+        json = response.json()
-        self.extraction_policies = []
-        for eb in response.json()["namespace"]["extraction_policies"]:
-            self.extraction_policies.append(ExtractionPolicy.from_dict(eb))
-        return self.extraction_policies
+        self.extraction_graphs = []
+        for graph in json["namespace"]["extraction_graphs"]:
+            self.extraction_graphs.append(ExtractionGraph.from_dict(graph))
+        return self.extraction_graphs
     def create_extraction_graph(self, extraction_graph: ExtractionGraph):
         """
@@ -335,7 +336,6 @@ class IndexifyClient:
             data=request_body,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         return
     def get_content_metadata(self, content_id: str) -> dict:
@@ -346,29 +346,8 @@ class IndexifyClient:
             - content_id (str): content id to query
         """
         response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
-        response.raise_for_status()
         return response.json()
-    def get_extracted_content(
-        self,
-        content_id: str = None,
-    ):
-        """
-        Get list of content from current namespace.
-        Args:
-            - parent_id (str): Optional filter for parent id
-            - labels_eq (str): Optional filter for labels
-        """
-        params = {"parent_id": content_id}
-        response = self.get(f"namespaces/{self.namespace}/content", params=params)
-        response.raise_for_status()
-        return [
-            self._add_content_url(content)
-            for content in response.json()["content_list"]
-        ]
     def download_content(self, id: str) -> bytes:
         """
         Download content from id. Return bytes
@@ -377,18 +356,14 @@ class IndexifyClient:
             - id (str): id of content to download
         """
         response = self.get(f"namespaces/{self.namespace}/content/{id}/download")
-        try:
-            response.raise_for_status()
-            return response.content
-        except httpx.HTTPStatusError as exc:
-            raise ApiException(exc.response.text)
+        return response.content
     def add_documents(
         self,
         extraction_graphs: Union[str, List[str]],
         documents: Union[Document, str, List[Union[Document, str]]],
         doc_id=None,
-    ) -> None:
+    ) -> Union[str, List[str]]:
         """
         Add documents to current namespace.
@@ -430,6 +405,11 @@ class IndexifyClient:
             headers={"Content-Type": "application/json"},
         )
         response.raise_for_status()
+        response_json = response.json()
+        content_ids = response_json["content_ids"]
+        if len(documents) == 1 and len(content_ids) == 1:
+            return content_ids[0]
+        return content_ids
     def delete_documents(self, document_ids: List[str]) -> None:
         """
@@ -444,7 +424,6 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
     def update_content(self, document_id: str, path: str) -> None:
         """
@@ -457,7 +436,6 @@ class IndexifyClient:
             response = self.put(
                 f"namespaces/{self.namespace}/content/{document_id}", files={"file": f}
             )
-            response.raise_for_status()
     def get_structured_data(self, content_id: str) -> dict:
         """
@@ -469,7 +447,6 @@ class IndexifyClient:
         response = self.get(
             f"namespaces/{self.namespace}/content/{content_id}/metadata"
         )
-        response.raise_for_status()
         return response.json().get("metadata", [])
     def search_index(
@@ -490,7 +467,6 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         return response.json()["results"]
     def upload_file(self, extraction_graphs: Union[str, List[str]], path: str, id=None, labels: dict = {}) -> str:
@@ -513,7 +489,6 @@ class IndexifyClient:
                 data=labels,
                 params=params,
             )
-            response.raise_for_status()
             response_json = response.json()
             return response_json["content_id"]
@@ -522,7 +497,6 @@ class IndexifyClient:
         List all schemas in the current namespace.
         """
         response = self.get(f"namespaces/{self.namespace}/schemas")
-        response.raise_for_status()
         return response.json()
     def get_content_tree(self, content_id: str):
@@ -535,9 +509,35 @@ class IndexifyClient:
         response = self.get(
             f"namespaces/{self.namespace}/content/{content_id}/content-tree"
         )
-        response.raise_for_status()
         return response.json()
+    def get_extracted_content(self, content_id: str, level: int = 0):
+        """
+        Get list of child for a given content id and their content up to the specified level.
+        Args:
+        - content_id (str): id of content
+        - level (int): depth of content retrieval (default: 0)
+        """
+        content_tree = self.get_content_tree(content_id)
+        child_list = []
+        def traverse_content(parent_id, current_level):
+            if current_level > level:
+                return
+            for item in content_tree['content_tree_metadata']:
+                if item['parent_id'] == parent_id:
+                    child_id = item['id']
+                    content = self.download_content(child_id)
+                    child_list.append({'id': child_id, 'content': content})
+                    traverse_content(child_id, current_level + 1)
+        traverse_content(content_id, 0)
+        return child_list
     def sql_query(self, query: str):
         """
         Execute a SQL query.
@@ -551,7 +551,6 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         result = response.json()
         rows = []
         for row in result["rows"]:
@@ -570,8 +569,19 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         return response.json()
+    def wait_for_extraction(self, content_id: str):
+        """
+        Wait for extraction to complete for a given content id
+        Args:
+            - content_id (str): id of content
+        """
+        response = self.get(
+            f"namespaces/{self.namespace}/content/{content_id}/wait"
+        )
+        response.raise_for_status()
     def generate_unique_hex_id(self):
         """
@@ -594,3 +604,16 @@ class IndexifyClient:
         """
         hash_object = hashlib.sha256(input_string.encode())
         return hash_object.hexdigest()[:16]
+    def __print_additional_error_context(self, error: Error):
+        print(error)
+        if error.status == "ExtractionGraphError":
+            graphs = [eg.name for eg in self.extraction_graphs]
+            extractors = [ext.name for ext in self.extractors()]
+            print(f"Available extraction graphs: {graphs}")
+            print(f"Available extractors: {extractors}")
+        if error.status == "SearchError":
+            indexes = [index["name"] for index in self.indexes()]
+            print(f"Available indexes: {indexes}")

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.21
+Version: 0.0.22
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0

@@ -0,0 +1,30 @@
+class Error(Exception):
+    status: str
+    message: str
+    def __init__(self, status: str, message: str):
+        self.status = status
+        self.message = message
+    @staticmethod
+    def from_tonic_error_string(url: str, error: str) -> "Error":
+        data = error.split(", ")
+        message = data[1].split(": ", 1)[1]
+        if message.startswith('"') and message.endswith('"'):
+            message = message[1:-1]
+        status = "GeneralError"
+        if "extraction_graph" in url:
+            status = "ExtractionGraphError"
+        elif "search" in url:
+            status = "SearchError"
+        error = Error(status, message)
+        return error
+    def __str__(self):
+        return f"{self.status} | {self.message.capitalize()}"
+    def __repr__(self):
+        return f"Error(status={self.status!r}, message={self.message!r})"

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "indexify"
-version = "0.0.21"
+version = "0.0.22"
 description = "Python Client for Indexify"
 authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
 license = "Apache 2.0"

indexify 0.0.21__tar.gz → 0.0.22__tar.gz

indexify 0.0.21tar.gz → 0.0.22tar.gz