PyPI - indexify - Versions diffs - 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl - Mend

indexify 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

indexify/client.py CHANGED Viewed

@@ -1,4 +1,7 @@
+import yaml
 import httpx
+import uuid
+import hashlib
 import json
 from collections import namedtuple
 from .settings import DEFAULT_SERVICE_URL
@@ -12,7 +15,7 @@ from dataclasses import dataclass
 from typing import List, Optional, Union, Dict
-Document = namedtuple("Document", ["text", "labels"])
+Document = namedtuple("Document", ["text", "labels", "id"])
 SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
@@ -44,14 +47,30 @@ class IndexifyClient:
         self,
         service_url: str = DEFAULT_SERVICE_URL,
         namespace: str = "default",
+        config_path: Optional[str] = None,
         *args,
         **kwargs,
     ):
+        if config_path:
+            with open(config_path, 'r') as file:
+                config = yaml.safe_load(file)
+            if config.get('use_tls', False):
+                tls_config = config['tls_config']
+                self._client = httpx.Client(
+                    http2=True,
+                    cert=(tls_config['cert_path'], tls_config['key_path']),
+                    verify=tls_config.get('ca_bundle_path', True)
+                )
+            else:
+                self._client = httpx.Client(*args, **kwargs)
+        else:
+            self._client = httpx.Client(*args, **kwargs)
         self.namespace: str = namespace
         self.extraction_policies: List[ExtractionPolicy] = []
         self.labels: dict = {}
         self._service_url = service_url
-        self._client = httpx.Client(*args, **kwargs)
         # get namespace data
         response = self.get(f"namespaces/{self.namespace}")
@@ -349,11 +368,21 @@ class IndexifyClient:
         except httpx.HTTPStatusError as exc:
             raise ApiException(exc.response.text)
         return
+    def get_content_metadata(self, content_id: str) -> dict:
+        """
+        Get metadata for a specific content ID in a given index.
-    def get_content(
+        Args:
+            - content_id (str): content id to query
+        """
+        response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
+        response.raise_for_status()
+        return response.json()
+    def get_extracted_content(
         self,
-        parent_id: str = None,
-        labels_eq: str = None,
+        content_id: str = None,
     ):
         """
         Get list of content from current namespace.
@@ -362,11 +391,7 @@ class IndexifyClient:
             - parent_id (str): Optional filter for parent id
             - labels_eq (str): Optional filter for labels
         """
-        params = {}
-        if parent_id:
-            params.update({"parent_id": parent_id})
-        if labels_eq:
-            params.update({"labels_eq": labels_eq})
+        params = {"parent_id": content_id}
         response = self.get(f"namespaces/{self.namespace}/content", params=params)
         response.raise_for_status()
@@ -390,7 +415,7 @@ class IndexifyClient:
             raise ApiException(exc.response.text)
     def add_documents(
-        self, documents: Union[Document, str, List[Union[Document, str]]]
+        self, documents: Union[Document, str, List[Union[Document, str]]], doc_id=None
     ) -> None:
         """
         Add documents to current namespace.
@@ -401,14 +426,14 @@ class IndexifyClient:
         if isinstance(documents, Document):
             documents = [documents]
         elif isinstance(documents, str):
-            documents = [Document(documents, {})]
+            documents = [Document(documents, {}, id=doc_id)]
         elif isinstance(documents, list):
             new_documents = []
             for item in documents:
                 if isinstance(item, Document):
                     new_documents.append(item)
                 elif isinstance(item, str):
-                    new_documents.append(Document(item, {}))
+                    new_documents.append(Document(item, {}, id=None)) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
                 else:
                     raise ValueError(
                         "List items must be either Document instances or strings."
@@ -419,7 +444,7 @@ class IndexifyClient:
                 "Invalid type for documents. Expected Document, str, or list of these."
             )
-        req = {"documents": documents}
+        req = {"documents": [doc._asdict() for doc in documents]}
         response = self.post(
             f"namespaces/{self.namespace}/add_texts",
             json=req,
@@ -453,7 +478,7 @@ class IndexifyClient:
             response = self.put(f"namespaces/{self.namespace}/content/{document_id}", files={"file": f}, timeout=None)
             response.raise_for_status()
-    def get_metadata(self, content_id: str) -> dict:
+    def get_structured_data(self, content_id: str) -> dict:
         """
         Query metadata for a specific content ID in a given index.
@@ -464,7 +489,7 @@ class IndexifyClient:
         response.raise_for_status()
         return response.json().get("metadata",[])
-    def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
+    def search_index(self, name: str, query: str, top_k: int, filters: List[str] = None) -> list[TextChunk]:
         """
         Search index in the current namespace.
@@ -472,8 +497,11 @@ class IndexifyClient:
             - name (str): name of index to search
             - query (str): query string
             - top_k (int): top k nearest neighbors to be returned
+            - filters (List[str]): list of filters to apply
         """
-        req = {"index": name, "query": query, "k": top_k}
+        if filters is None:
+            filters = []
+        req = {"index": name, "query": query, "k": top_k, "filters": filters}
         response = self.post(
             f"namespaces/{self.namespace}/search",
             json=req,
@@ -482,17 +510,23 @@ class IndexifyClient:
         response.raise_for_status()
         return response.json()["results"]
-    def upload_file(self, path: str):
+    def upload_file(self, path: str, id=None, labels: dict = {}):
         """
         Upload a file.
         Args:
             - path (str): relative path to the file to be uploaded
+            - labels (dict): labels to be associated with the file
         """
+        params={}
+        if id is not None:
+            params['id'] = id
         with open(path, "rb") as f:
             response = self.post(
                 f"namespaces/{self.namespace}/upload_file",
                 files={"file": f},
+                data=labels,
+                params=params,
                 timeout=None,
             )
             response.raise_for_status()
@@ -535,4 +569,27 @@ class IndexifyClient:
         )
         response.raise_for_status()
         return response.json()
+    def generate_unique_hex_id(self):
+        """
+        Generate a unique hexadecimal identifier
+        Returns:
+            str: a unique hexadecimal string
+        """
+        return uuid.uuid4().hex[:16]
+    def generate_hash_from_string(self, input_string: str):
+        """
+        Generate a hash for the given string and return it as a hexadecimal string.
+        Args:
+            input_string (str): The input string to hash.
+        Returns:
+            str: The hexadecimal hash of the input string.
+        """
+        hash_object = hashlib.sha256(input_string.encode())
+        return hash_object.hexdigest()[:16]

{indexify-0.0.14.dist-info → indexify-0.0.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.14
+Version: 0.0.16
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0

{indexify-0.0.14.dist-info → indexify-0.0.16.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 indexify/__init__.py,sha256=Sz6zkAIHsPOi0rG5RM7dVkXGDa0fO2uurD6vS4Qo15E,312
-indexify/client.py,sha256=sT4tcSuR3wQBF0yYStBRva3xUfe15X6GjZaViiRX2sA,16944
+indexify/client.py,sha256=ZDirw1O46nRx0WBgB95jvpkd4LdAjgZnlQ_2A673_cI,19047
 indexify/data_containers.py,sha256=r1wxJPtsmXbyKvb17fqxm-dPjKz51oZ62f8A8Zxls1c,361
 indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
 indexify/extraction_policy.py,sha256=vKVHT8jSjzhUaKqWpewOGkYojMBplvGdSm9zoSN9Pcg,750
@@ -7,7 +7,7 @@ indexify/extractor.py,sha256=KMcP9xopHJRBzeSxalztGGTBvOzVKRFEsJynV-hLRSc,1175
 indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
 indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
 indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
-indexify-0.0.14.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-indexify-0.0.14.dist-info/METADATA,sha256=ANdIdnRnC6ISLYc1oTUb-BE-NjdgUg0iEu0dHRmonnI,1714
-indexify-0.0.14.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-indexify-0.0.14.dist-info/RECORD,,
+indexify-0.0.16.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+indexify-0.0.16.dist-info/METADATA,sha256=h_tYmLlbYT0g_9SJnec9hgey8AYP0VTKascytOt0_jE,1714
+indexify-0.0.16.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+indexify-0.0.16.dist-info/RECORD,,

{indexify-0.0.14.dist-info → indexify-0.0.16.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{indexify-0.0.14.dist-info → indexify-0.0.16.dist-info}/WHEEL RENAMED Viewed

File without changes

indexify 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

indexify 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl