PyPI - indexify - Versions diffs - 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl - Mend

indexify 0.0.21py3-none-any.whl → 0.0.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

indexify/__init__.py +4 -2
indexify/client.py +106 -57
indexify/error.py +30 -0
{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/METADATA +1 -1
{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/RECORD +7 -6
{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/LICENSE.txt +0 -0
{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/WHEEL +0 -0

indexify/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .index import Index
 from .client import IndexifyClient
-from .extraction_policy import ExtractionPolicy, ExtractionGraphBuilder, ExtractionGraph
-from .client import IndexifyClient, Document
+from .extraction_policy import ExtractionGraph
+from .client import IndexifyClient, Document, generate_hash_from_string, generate_unique_hex_id
 from .settings import DEFAULT_SERVICE_URL
 __all__ = [
@@ -11,4 +11,6 @@ __all__ = [
     "ExtractionGraph",
     "ExtractionGraphBuilder" "ExtractionPolicy",
     "DEFAULT_SERVICE_URL",
+    "generate_hash_from_string",
+    "generate_unique_hex_id",
 ]

indexify/client.py CHANGED Viewed

@@ -9,16 +9,39 @@ from .extractor import Extractor
 from .extraction_policy import ExtractionPolicy, ExtractionGraph
 from .index import Index
 from .utils import json_set_default
+from .error import Error
 from .data_containers import TextChunk
 from indexify.exceptions import ApiException
 from dataclasses import dataclass
 from typing import List, Optional, Union, Dict
+import logging
 Document = namedtuple("Document", ["text", "labels", "id"])
 SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
+def generate_unique_hex_id():
+    """
+    Generate a unique hexadecimal identifier
+    Returns:
+        str: a unique hexadecimal string
+    """
+    return uuid.uuid4().hex[:16]
+def generate_hash_from_string(input_string: str):
+    """
+    Generate a hash for the given string and return it as a hexadecimal string.
+    Args:
+        input_string (str): The input string to hash.
+    Returns:
+        str: The hexadecimal hash of the input string.
+    """
+    hash_object = hashlib.sha256(input_string.encode())
+    return hash_object.hexdigest()[:16]
 @dataclass
 class SqlQueryResult:
@@ -75,12 +98,7 @@ class IndexifyClient:
         self._timeout = kwargs.get("timeout")
         # get namespace data
-        response = self.get(f"namespaces/{self.namespace}")
-        response.raise_for_status()
-        resp_json = response.json()
-        # initialize extraction_policies
-        for eb in resp_json["namespace"]["extraction_graphs"]:
-            self.extraction_graphs.append(ExtractionGraph.from_dict(eb))
+        self.extraction_graphs = self.get_extraction_graphs()
     @classmethod
     def with_mtls(
@@ -130,12 +148,19 @@ class IndexifyClient:
         return client
     def _request(self, method: str, **kwargs) -> httpx.Response:
-        response = self._client.request(method, timeout=self._timeout, **kwargs)
         try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as exc:
-            print(f"exception: {exc}, response text: {response.text}")
-            raise exc
+            response = self._client.request(method, timeout=self._timeout, **kwargs)
+            status_code = str(response.status_code)
+            if status_code.startswith("4") or status_code.startswith("5"):
+                raise ApiException(response.text)
+                #error = Error.from_tonic_error_string(str(response.url), response.text)
+                #self.__print_additional_error_context(error)
+                #raise error
+        except httpx.ConnectError:
+            message = f"Make sure the server is running and accesible at {self._service_url}"
+            error = Error(status="ConnectionError", message=message)
+            print(error)
+            raise error
         return response
     def get(self, endpoint: str, **kwargs) -> httpx.Response:
@@ -291,7 +316,6 @@ class IndexifyClient:
             List[Index]: list of indexes in the current namespace
         """
         response = self.get(f"namespaces/{self.namespace}/indexes")
-        response.raise_for_status()
         return response.json()["indexes"]
     def extractors(self) -> List[Extractor]:
@@ -308,17 +332,18 @@ class IndexifyClient:
             extractors.append(Extractor.from_dict(ed))
         return extractors
-    def get_extraction_policies(self):
+    def get_extraction_graphs(self) -> List[ExtractionGraph]:
         """
         Retrieve and update the list of extraction policies for the current namespace.
         """
         response = self.get(f"namespaces/{self.namespace}")
-        response.raise_for_status()
+        json = response.json()
+        self.extraction_graphs = []
+        for graph in json["namespace"]["extraction_graphs"]:
+            self.extraction_graphs.append(ExtractionGraph.from_dict(graph))
-        self.extraction_policies = []
-        for eb in response.json()["namespace"]["extraction_policies"]:
-            self.extraction_policies.append(ExtractionPolicy.from_dict(eb))
-        return self.extraction_policies
+        return self.extraction_graphs
     def create_extraction_graph(self, extraction_graph: ExtractionGraph):
         """
@@ -335,7 +360,6 @@ class IndexifyClient:
             data=request_body,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         return
     def get_content_metadata(self, content_id: str) -> dict:
@@ -346,29 +370,8 @@ class IndexifyClient:
             - content_id (str): content id to query
         """
         response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
-        response.raise_for_status()
         return response.json()
-    def get_extracted_content(
-        self,
-        content_id: str = None,
-    ):
-        """
-        Get list of content from current namespace.
-        Args:
-            - parent_id (str): Optional filter for parent id
-            - labels_eq (str): Optional filter for labels
-        """
-        params = {"parent_id": content_id}
-        response = self.get(f"namespaces/{self.namespace}/content", params=params)
-        response.raise_for_status()
-        return [
-            self._add_content_url(content)
-            for content in response.json()["content_list"]
-        ]
     def download_content(self, id: str) -> bytes:
         """
         Download content from id. Return bytes
@@ -377,18 +380,14 @@ class IndexifyClient:
             - id (str): id of content to download
         """
         response = self.get(f"namespaces/{self.namespace}/content/{id}/download")
-        try:
-            response.raise_for_status()
-            return response.content
-        except httpx.HTTPStatusError as exc:
-            raise ApiException(exc.response.text)
+        return response.content
     def add_documents(
         self,
         extraction_graphs: Union[str, List[str]],
         documents: Union[Document, str, List[Union[Document, str]]],
         doc_id=None,
-    ) -> None:
+    ) -> Union[str, List[str]]:
         """
         Add documents to current namespace.
@@ -430,6 +429,11 @@ class IndexifyClient:
             headers={"Content-Type": "application/json"},
         )
         response.raise_for_status()
+        response_json = response.json()
+        content_ids = response_json["content_ids"]
+        if len(documents) == 1 and len(content_ids) == 1:
+            return content_ids[0]
+        return content_ids
     def delete_documents(self, document_ids: List[str]) -> None:
         """
@@ -444,7 +448,6 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
     def update_content(self, document_id: str, path: str) -> None:
         """
@@ -457,7 +460,6 @@ class IndexifyClient:
             response = self.put(
                 f"namespaces/{self.namespace}/content/{document_id}", files={"file": f}
             )
-            response.raise_for_status()
     def get_structured_data(self, content_id: str) -> dict:
         """
@@ -469,7 +471,6 @@ class IndexifyClient:
         response = self.get(
             f"namespaces/{self.namespace}/content/{content_id}/metadata"
         )
-        response.raise_for_status()
         return response.json().get("metadata", [])
     def search_index(
@@ -490,7 +491,6 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         return response.json()["results"]
     def upload_file(self, extraction_graphs: Union[str, List[str]], path: str, id=None, labels: dict = {}) -> str:
@@ -513,7 +513,6 @@ class IndexifyClient:
                 data=labels,
                 params=params,
             )
-            response.raise_for_status()
             response_json = response.json()
             return response_json["content_id"]
@@ -522,7 +521,6 @@ class IndexifyClient:
         List all schemas in the current namespace.
         """
         response = self.get(f"namespaces/{self.namespace}/schemas")
-        response.raise_for_status()
         return response.json()
     def get_content_tree(self, content_id: str):
@@ -535,9 +533,35 @@ class IndexifyClient:
         response = self.get(
             f"namespaces/{self.namespace}/content/{content_id}/content-tree"
         )
-        response.raise_for_status()
         return response.json()
+    def get_extracted_content(self, content_id: str, level: int = 0):
+        """
+        Get list of child for a given content id and their content up to the specified level.
+        Args:
+        - content_id (str): id of content
+        - level (int): depth of content retrieval (default: 0)
+        """
+        content_tree = self.get_content_tree(content_id)
+        child_list = []
+        def traverse_content(parent_id, current_level):
+            if current_level > level:
+                return
+            for item in content_tree['content_tree_metadata']:
+                if item['parent_id'] == parent_id:
+                    child_id = item['id']
+                    content = self.download_content(child_id)
+                    child_list.append({'id': child_id, 'content': content})
+                    traverse_content(child_id, current_level + 1)
+        traverse_content(content_id, 0)
+        return child_list
     def sql_query(self, query: str):
         """
         Execute a SQL query.
@@ -551,7 +575,6 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         result = response.json()
         rows = []
         for row in result["rows"]:
@@ -570,8 +593,19 @@ class IndexifyClient:
             json=req,
             headers={"Content-Type": "application/json"},
         )
-        response.raise_for_status()
         return response.json()
+    def wait_for_extraction(self, content_id: str):
+        """
+        Wait for extraction to complete for a given content id
+        Args:
+            - content_id (str): id of content
+        """
+        response = self.get(
+            f"namespaces/{self.namespace}/content/{content_id}/wait"
+        )
+        response.raise_for_status()
     def generate_unique_hex_id(self):
         """
@@ -580,6 +614,7 @@ class IndexifyClient:
         Returns:
             str: a unique hexadecimal string
         """
+        logging.warning("This method is deprecated. Use generate_unique_hex_id from indexify instead.")
         return uuid.uuid4().hex[:16]
     def generate_hash_from_string(self, input_string: str):
@@ -592,5 +627,19 @@ class IndexifyClient:
         Returns:
             str: The hexadecimal hash of the input string.
         """
+        logging.warning("This method is deprecated. Use generate_hash_from_string from indexify instead.")
         hash_object = hashlib.sha256(input_string.encode())
         return hash_object.hexdigest()[:16]
+    def __print_additional_error_context(self, error: Error):
+        print(error)
+        if error.status == "ExtractionGraphError":
+            graphs = [eg.name for eg in self.extraction_graphs]
+            extractors = [ext.name for ext in self.extractors()]
+            print(f"Available extraction graphs: {graphs}")
+            print(f"Available extractors: {extractors}")
+        if error.status == "SearchError":
+            indexes = [index["name"] for index in self.indexes()]
+            print(f"Available indexes: {indexes}")

indexify/error.py ADDED Viewed

@@ -0,0 +1,30 @@
+class Error(Exception):
+    status: str
+    message: str
+    def __init__(self, status: str, message: str):
+        self.status = status
+        self.message = message
+    @staticmethod
+    def from_tonic_error_string(url: str, error: str) -> "Error":
+        data = error.split(", ")
+        message = data[1].split(": ", 1)[1]
+        if message.startswith('"') and message.endswith('"'):
+            message = message[1:-1]
+        status = "GeneralError"
+        if "extraction_graph" in url:
+            status = "ExtractionGraphError"
+        elif "search" in url:
+            status = "SearchError"
+        error = Error(status, message)
+        return error
+    def __str__(self):
+        return f"{self.status} | {self.message.capitalize()}"
+    def __repr__(self):
+        return f"Error(status={self.status!r}, message={self.message!r})"

{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.21
+Version: 0.0.23
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0

{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,14 @@
-indexify/__init__.py,sha256=hhDqRvJo4gCW1eqVgFblxKiBzArCFfo2eFGOBsQkDOc,401
-indexify/client.py,sha256=s2Xflh75574WvNp0lbG6PGtK2Dy3CMfME5MDK1iDgR4,19334
+indexify/__init__.py,sha256=Y40-Ur_tL7kGGs-reh9BTfEYGe-KyGxgdg-CmoFsXRQ,473
+indexify/client.py,sha256=Q6QJ_yzJMmH_h0x3EwXL69qmp-TPrU7lcQedw__rRnk,21238
 indexify/data_containers.py,sha256=r1wxJPtsmXbyKvb17fqxm-dPjKz51oZ62f8A8Zxls1c,361
+indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
 indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
 indexify/extraction_policy.py,sha256=dIyQK3N-QOpQ0BPjiZ_635o8A5ITNxaz1syQ_FPaE0k,1851
 indexify/extractor.py,sha256=sWFLlXHgEfWlmiKAXN6ytUt_uG7th-XGNHqz-TG39gs,1216
 indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
 indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
 indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
-indexify-0.0.21.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-indexify-0.0.21.dist-info/METADATA,sha256=Rb_7fwsIiJKuJaLnmJp7Cw4exYLhHcdx48OfBcFzaO4,1753
-indexify-0.0.21.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-indexify-0.0.21.dist-info/RECORD,,
+indexify-0.0.23.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+indexify-0.0.23.dist-info/METADATA,sha256=vQqfHcLrf52YvCNbuAc1m9yLh-rVSGkRqfMKbcTuSb0,1753
+indexify-0.0.23.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+indexify-0.0.23.dist-info/RECORD,,

{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{indexify-0.0.21.dist-info → indexify-0.0.23.dist-info}/WHEEL RENAMED Viewed

File without changes

indexify 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl

indexify 0.0.21py3-none-any.whl → 0.0.23py3-none-any.whl