PyPI - indexify - Versions diffs - 0.0.22__tar.gz → 0.0.24__tar.gz - Mend

indexify 0.0.22tar.gz → 0.0.24tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{indexify-0.0.22 → indexify-0.0.24}/PKG-INFO RENAMED Viewed

@@ -1,14 +1,15 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.22
+Version: 0.0.24
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0
 Author: Diptanu Gon Choudhury
 Author-email: diptanuc@gmail.com
-Requires-Python: >=3.10.0,<4.0.0
+Requires-Python: >=3.9,<4.0
 Classifier: License :: Other/Proprietary License
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12

{indexify-0.0.22 → indexify-0.0.24}/indexify/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from .index import Index
 from .client import IndexifyClient
-from .extraction_policy import ExtractionPolicy, ExtractionGraphBuilder, ExtractionGraph
-from .client import IndexifyClient, Document
+from .extraction_policy import ExtractionGraph
+from .client import IndexifyClient, Document, generate_hash_from_string, generate_unique_hex_id
 from .settings import DEFAULT_SERVICE_URL
 __all__ = [
@@ -11,4 +11,6 @@ __all__ = [
     "ExtractionGraph",
     "ExtractionGraphBuilder" "ExtractionPolicy",
     "DEFAULT_SERVICE_URL",
+    "generate_hash_from_string",
+    "generate_unique_hex_id",
 ]

{indexify-0.0.22 → indexify-0.0.24}/indexify/client.py RENAMED Viewed

@@ -14,12 +14,37 @@ from .data_containers import TextChunk
 from indexify.exceptions import ApiException
 from dataclasses import dataclass
 from typing import List, Optional, Union, Dict
+import logging
 Document = namedtuple("Document", ["text", "labels", "id"])
 SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
+def generate_unique_hex_id():
+    """
+    Generate a unique hexadecimal identifier
+    Returns:
+        str: a unique hexadecimal string
+    """
+    return uuid.uuid4().hex[:16]
+def generate_hash_from_string(input_string: str):
+    """
+    Generate a hash for the given string and return it as a hexadecimal string.
+    Args:
+        input_string (str): The input string to hash.
+    Returns:
+        str: The hexadecimal hash of the input string.
+    """
+    hash_object = hashlib.sha256(input_string.encode())
+    return hash_object.hexdigest()[:16]
 @dataclass
 class SqlQueryResult:
     result: List[Dict]
@@ -129,11 +154,14 @@ class IndexifyClient:
             response = self._client.request(method, timeout=self._timeout, **kwargs)
             status_code = str(response.status_code)
             if status_code.startswith("4") or status_code.startswith("5"):
-                error = Error.from_tonic_error_string(str(response.url), response.text)
-                self.__print_additional_error_context(error)
-                raise error
+                raise ApiException(response.text)
+                # error = Error.from_tonic_error_string(str(response.url), response.text)
+                # self.__print_additional_error_context(error)
+                # raise error
         except httpx.ConnectError:
-            message = f"Make sure the server is running and accesible at {self._service_url}"
+            message = (
+                f"Make sure the server is running and accesible at {self._service_url}"
+            )
             error = Error(status="ConnectionError", message=message)
             print(error)
             raise error
@@ -347,7 +375,7 @@ class IndexifyClient:
         """
         response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
         return response.json()
     def download_content(self, id: str) -> bytes:
         """
         Download content from id. Return bytes
@@ -425,6 +453,21 @@ class IndexifyClient:
             headers={"Content-Type": "application/json"},
         )
+    def update_labels(self, document_id: str, labels: Dict[str, str]) -> None:
+        """
+        Update labels for a document.
+        Args:
+            - document_id (str): id of document to update
+            - labels (Dict[str, str]): labels to update
+        """
+        req = {"labels": labels}
+        response = self.put(
+            f"namespaces/{self.namespace}/content/{document_id}/labels",
+            json=req,
+            headers={"Content-Type": "application/json"},
+        )
     def update_content(self, document_id: str, path: str) -> None:
         """
         Update a piece of content with a new file
@@ -469,7 +512,13 @@ class IndexifyClient:
         )
         return response.json()["results"]
-    def upload_file(self, extraction_graphs: Union[str, List[str]], path: str, id=None, labels: dict = {}) -> str:
+    def upload_file(
+        self,
+        extraction_graphs: Union[str, List[str]],
+        path: str,
+        id=None,
+        labels: dict = {},
+    ) -> str:
         """
         Upload a file.
@@ -514,28 +563,28 @@ class IndexifyClient:
     def get_extracted_content(self, content_id: str, level: int = 0):
         """
         Get list of child for a given content id and their content up to the specified level.
         Args:
         - content_id (str): id of content
         - level (int): depth of content retrieval (default: 0)
         """
         content_tree = self.get_content_tree(content_id)
         child_list = []
         def traverse_content(parent_id, current_level):
             if current_level > level:
                 return
-            for item in content_tree['content_tree_metadata']:
-                if item['parent_id'] == parent_id:
-                    child_id = item['id']
+            for item in content_tree["content_tree_metadata"]:
+                if item["parent_id"] == parent_id:
+                    child_id = item["id"]
                     content = self.download_content(child_id)
-                    child_list.append({'id': child_id, 'content': content})
+                    child_list.append({"id": child_id, "content": content})
                     traverse_content(child_id, current_level + 1)
         traverse_content(content_id, 0)
         return child_list
     def sql_query(self, query: str):
@@ -559,18 +608,29 @@ class IndexifyClient:
         return SqlQueryResult(result=rows)
     def ingest_remote_file(
-        self, extraction_graphs: Union[str, List[str]], url: str, mime_type: str, labels: Dict[str, str], id=None
+        self,
+        extraction_graphs: Union[str, List[str]],
+        url: str,
+        mime_type: str,
+        labels: Dict[str, str],
+        id=None,
     ):
         if isinstance(extraction_graphs, str):
             extraction_graphs = [extraction_graphs]
-        req = {"url": url, "mime_type": mime_type, "labels": labels, "id": id, "extraction_graph_names": extraction_graphs}
+        req = {
+            "url": url,
+            "mime_type": mime_type,
+            "labels": labels,
+            "id": id,
+            "extraction_graph_names": extraction_graphs,
+        }
         response = self.post(
             f"namespaces/{self.namespace}/ingest_remote_file",
             json=req,
             headers={"Content-Type": "application/json"},
         )
         return response.json()
     def wait_for_extraction(self, content_id: str):
         """
         Wait for extraction to complete for a given content id
@@ -578,9 +638,7 @@ class IndexifyClient:
         Args:
             - content_id (str): id of content
         """
-        response = self.get(
-            f"namespaces/{self.namespace}/content/{content_id}/wait"
-        )
+        response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
         response.raise_for_status()
     def generate_unique_hex_id(self):
@@ -590,6 +648,9 @@ class IndexifyClient:
         Returns:
             str: a unique hexadecimal string
         """
+        logging.warning(
+            "This method is deprecated. Use generate_unique_hex_id from indexify instead."
+        )
         return uuid.uuid4().hex[:16]
     def generate_hash_from_string(self, input_string: str):
@@ -602,6 +663,9 @@ class IndexifyClient:
         Returns:
             str: The hexadecimal hash of the input string.
         """
+        logging.warning(
+            "This method is deprecated. Use generate_hash_from_string from indexify instead."
+        )
         hash_object = hashlib.sha256(input_string.encode())
         return hash_object.hexdigest()[:16]

{indexify-0.0.22 → indexify-0.0.24}/pyproject.toml RENAMED Viewed

@@ -1,15 +1,15 @@
 [tool.poetry]
 name = "indexify"
-version = "0.0.22"
+version = "0.0.24"
 description = "Python Client for Indexify"
-authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
+authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
 license = "Apache 2.0"
 readme = "README.md"
 homepage = "https://github.com/tensorlakeai/indexify"
 repository = "https://github.com/tensorlakeai/indexify"
 [tool.poetry.dependencies]
-python = "^3.10.0"
+python = "^3.9"
 httpx = { version = "^0.26", extras = ["http2"] }
 pyyaml = "^6.0.1"