PyPI - indexify - Versions diffs - 0.0.15__tar.gz → 0.0.17__tar.gz - Mend

indexify 0.0.15tar.gz → 0.0.17tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{indexify-0.0.15 → indexify-0.0.17}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.15
+Version: 0.0.17
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0

{indexify-0.0.15 → indexify-0.0.17}/indexify/client.py RENAMED Viewed

@@ -1,4 +1,7 @@
+import yaml
 import httpx
+import uuid
+import hashlib
 import json
 from collections import namedtuple
 from .settings import DEFAULT_SERVICE_URL
@@ -12,7 +15,7 @@ from dataclasses import dataclass
 from typing import List, Optional, Union, Dict
-Document = namedtuple("Document", ["text", "labels"])
+Document = namedtuple("Document", ["text", "labels", "id"])
 SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
@@ -42,16 +45,32 @@ class IndexifyClient:
     def __init__(
         self,
-        service_url: str = DEFAULT_SERVICE_URL,
+        service_url: str = DEFAULT_SERVICE_URL, # switch this to DEFAULT_SERVICE_URL_HTTPS for TLS
         namespace: str = "default",
+        config_path: Optional[str] = None,
         *args,
         **kwargs,
     ):
+        if config_path:
+            with open(config_path, 'r') as file:
+                config = yaml.safe_load(file)
+            if config.get('use_tls', False):
+                tls_config = config['tls_config']
+                self._client = httpx.Client(
+                    http2=True,
+                    cert=(tls_config['cert_path'], tls_config['key_path']),
+                    verify=tls_config.get('ca_bundle_path', True)
+                )
+            else:
+                self._client = httpx.Client(*args, **kwargs)
+        else:
+            self._client = httpx.Client(*args, **kwargs)
         self.namespace: str = namespace
         self.extraction_policies: List[ExtractionPolicy] = []
         self.labels: dict = {}
         self._service_url = service_url
-        self._client = httpx.Client(*args, **kwargs)
         # get namespace data
         response = self.get(f"namespaces/{self.namespace}")
@@ -396,7 +415,7 @@ class IndexifyClient:
             raise ApiException(exc.response.text)
     def add_documents(
-        self, documents: Union[Document, str, List[Union[Document, str]]]
+        self, documents: Union[Document, str, List[Union[Document, str]]], doc_id=None
     ) -> None:
         """
         Add documents to current namespace.
@@ -407,14 +426,14 @@ class IndexifyClient:
         if isinstance(documents, Document):
             documents = [documents]
         elif isinstance(documents, str):
-            documents = [Document(documents, {})]
+            documents = [Document(documents, {}, id=doc_id)]
         elif isinstance(documents, list):
             new_documents = []
             for item in documents:
                 if isinstance(item, Document):
                     new_documents.append(item)
                 elif isinstance(item, str):
-                    new_documents.append(Document(item, {}))
+                    new_documents.append(Document(item, {}, id=None)) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
                 else:
                     raise ValueError(
                         "List items must be either Document instances or strings."
@@ -425,7 +444,7 @@ class IndexifyClient:
                 "Invalid type for documents. Expected Document, str, or list of these."
             )
-        req = {"documents": documents}
+        req = {"documents": [doc._asdict() for doc in documents]}
         response = self.post(
             f"namespaces/{self.namespace}/add_texts",
             json=req,
@@ -470,7 +489,7 @@ class IndexifyClient:
         response.raise_for_status()
         return response.json().get("metadata",[])
-    def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
+    def search_index(self, name: str, query: str, top_k: int, filters: List[str] = []) -> list[TextChunk]:
         """
         Search index in the current namespace.
@@ -478,8 +497,9 @@ class IndexifyClient:
             - name (str): name of index to search
             - query (str): query string
             - top_k (int): top k nearest neighbors to be returned
+            - filters (List[str]): list of filters to apply
         """
-        req = {"index": name, "query": query, "k": top_k}
+        req = {"index": name, "query": query, "k": top_k, "filters": filters}
         response = self.post(
             f"namespaces/{self.namespace}/search",
             json=req,
@@ -488,20 +508,28 @@ class IndexifyClient:
         response.raise_for_status()
         return response.json()["results"]
-    def upload_file(self, path: str):
+    def upload_file(self, path: str, id=None, labels: dict = {}) -> str:
         """
         Upload a file.
         Args:
             - path (str): relative path to the file to be uploaded
+            - labels (dict): labels to be associated with the file
         """
+        params={}
+        if id is not None:
+            params['id'] = id
         with open(path, "rb") as f:
             response = self.post(
                 f"namespaces/{self.namespace}/upload_file",
                 files={"file": f},
+                data=labels,
+                params=params,
                 timeout=None,
             )
             response.raise_for_status()
+            response_json = response.json()
+            return response_json["content_id"]
     def list_schemas(self) -> List[str]:
         """
@@ -541,4 +569,27 @@ class IndexifyClient:
         )
         response.raise_for_status()
         return response.json()
+    def generate_unique_hex_id(self):
+        """
+        Generate a unique hexadecimal identifier
+        Returns:
+            str: a unique hexadecimal string
+        """
+        return uuid.uuid4().hex[:16]
+    def generate_hash_from_string(self, input_string: str):
+        """
+        Generate a hash for the given string and return it as a hexadecimal string.
+        Args:
+            input_string (str): The input string to hash.
+        Returns:
+            str: The hexadecimal hash of the input string.
+        """
+        hash_object = hashlib.sha256(input_string.encode())
+        return hash_object.hexdigest()[:16]

indexify-0.0.17/indexify/settings.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ DEFAULT_SERVICE_URL = "http://localhost:8900"
2	+ DEFAULT_SERVICE_URL_HTTPS = "https://localhost:8900"

{indexify-0.0.15 → indexify-0.0.17}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "indexify"
-version = "0.0.15"
+version = "0.0.17"
 description = "Python Client for Indexify"
 authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
 license = "Apache 2.0"