PyPI - indexify - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl - Mend

indexify 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

indexify/__init__.py +9 -5
indexify/client.py +30 -0
indexify/data_containers.py +5 -1
indexify/extractor.py +25 -0
indexify/index.py +23 -12
indexify/memory.py +39 -25
indexify/repository.py +156 -15
indexify/settings.py +1 -0
indexify/utils.py +22 -10
indexify-0.0.3.dist-info/METADATA +53 -0
indexify-0.0.3.dist-info/RECORD +13 -0
indexify-0.0.2.dist-info/METADATA +0 -18
indexify-0.0.2.dist-info/RECORD +0 -10
{indexify-0.0.2.dist-info → indexify-0.0.3.dist-info}/LICENSE.txt +0 -0
{indexify-0.0.2.dist-info → indexify-0.0.3.dist-info}/WHEEL +0 -0

indexify/__init__.py CHANGED Viewed

@@ -1,7 +1,11 @@
-from .index import Index, Message, TextChunk
-from .memory import Memory
-from .repository import Repository
+from .index import Index, AIndex
+from .client import IndexifyClient
+from .memory import Memory, AMemory
+from .repository import Repository, ARepository, create_repository, list_repositories
+from .data_containers import TextChunk, Message
+from .utils import wait_until
+from .settings import DEFAULT_SERVICE_URL
-DEFAULT_INDEXIFY_URL = "http://localhost:8900"
-__all__ = ["Index", "Memory", "Message", "TextChunk", "Repository", "DEFAULT_INDEXIFY_URL"]
+__all__ = ["Index", "Memory", "Repository", "AIndex", "AMemory", "ARepository",
+           "Message", "TextChunk", "DEFAULT_SERVICE_URL", "wait_until", "IndexifyMemory"]

indexify/client.py ADDED Viewed

@@ -0,0 +1,30 @@
+from .extractor import Extractor, list_extractors
+from .repository import Repository, create_repository, list_repositories
+from .settings import DEFAULT_SERVICE_URL
+class IndexifyClient:
+    def __init__(self, service_url: str = DEFAULT_SERVICE_URL):
+        self._service_url = service_url
+    def create_repository(self, name: str, extractors: list = [], metadata: dict = {}) -> dict:
+        return create_repository(name, extractors, metadata, self._service_url)
+    @property
+    def extractors(self) -> list[Extractor]:
+        return [Extractor(**extractor) for extractor in list_extractors(self._service_url)]
+    def get_or_create_repository(self, name: str) -> Repository:
+        return Repository(name=name, service_url=self._service_url)
+    def list_extractors(self) -> list[dict]:
+        return list_extractors(base_url=self._service_url)
+    def list_repositories(self) -> list[dict]:
+        return list_repositories(service_url=self._service_url)
+    @property
+    def repositories(self) -> list[Repository]:
+        # TODO: implement this
+        pass

indexify/data_containers.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Optional, List
+from typing import List
 from dataclasses import dataclass, field
@@ -11,6 +11,7 @@ class TextSplitter(str, Enum):
     def __str__(self) -> str:
         return self.value.lower()
 @dataclass
 class TextChunk:
     text: str
@@ -19,6 +20,7 @@ class TextChunk:
     def to_dict(self):
         return {"text": self.text, "metadata": self.metadata}
 @dataclass
 class Message:
     role: str
@@ -28,6 +30,7 @@ class Message:
     def to_dict(self):
         return {"role": self.role, "text": self.text, "metadata": self.metadata}
 @dataclass
 class SearchChunk:
     index: str
@@ -37,6 +40,7 @@ class SearchChunk:
     def to_dict(self):
         return {"index": self.index, "query": self.query, "k": self.k}
 @dataclass
 class SearchResult:
     results: List[TextChunk]

indexify/extractor.py ADDED Viewed

@@ -0,0 +1,25 @@
+import requests
+from .settings import DEFAULT_SERVICE_URL
+def list_extractors(base_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
+    response = requests.get(f"{base_url}/extractors")
+    response.raise_for_status()
+    return response.json()['extractors']
+# TODO: consider naming this IndexifyExtractor
+# TODO: consider making this a dataclass
+class Extractor:
+    def __init__(self, name: str, description: str, extractor_type : dict):
+        self.name = name
+        self.description = description
+        self.extractor_type = extractor_type
+    def __repr__(self) -> str:
+        return f"Extractor(name={self.name}, description={self.description})"
+    def __str__(self) -> str:
+        return self.__repr__()

indexify/index.py CHANGED Viewed

@@ -1,20 +1,31 @@
-import requests
+import aiohttp
-from .data_containers import *
-from .utils import _get_payload
+from .data_containers import SearchChunk, TextChunk
+from .utils import _get_payload, wait_until
-class Index:
+class AIndex:
-    def __init__(self, url, index):
+    def __init__(self, url: str, index: str = "default/default"):
         self._url = url
         self._index = index
-    def search(self, query: str, top_k: int) -> list[TextChunk]:
+    async def search(self, query: str, top_k: int) -> list[TextChunk]:
         req = SearchChunk(index=self._index, query=query, k=top_k)
-        resp = requests.get(f"{self._url}/index/search", json=req.to_dict())
-        payload = _get_payload(resp)
-        result = []
-        for res in payload["results"]:
-            result.append(TextChunk(text=res["text"], metadata=res["metadata"]))
-        return result
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{self._url}/index/search", json=req.to_dict()) as resp:
+                payload = await _get_payload(resp)
+                result = []
+                for res in payload["results"]:
+                    result.append(TextChunk(text=res["text"], metadata=res["metadata"]))
+                return result
+class Index(AIndex):
+    def __init__(self, url, index):
+        AIndex.__init__(self, url, index)
+    def search(self, query: str, top_k: int) -> list[TextChunk]:
+        wait_until(AIndex.search(self, query, top_k))

indexify/memory.py CHANGED Viewed

@@ -1,39 +1,53 @@
-import requests
+import aiohttp
 from .data_containers import *
-from .utils import _get_payload
+from .utils import _get_payload, wait_until
-class Memory:
+class AMemory:
-    def __init__(self, url, index):
+    def __init__(self, url, repository="default"):
+        self._session_id = None
         self._url = url
-        self._index = index
+        self._repo = repository
-    def create(self) -> str:
-        resp = requests.post(f"{self._url}/memory/create", json={})
-        self.session_id = _get_payload(resp)["session_id"]
-        return self.session_id
+    async def create(self) -> str:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self._url}/memory/create", json={"repository": self._repo}) as resp:
+                resp = await _get_payload(resp)
+                self._session_id = resp["session_id"]
+        return self._session_id
-    def add(self, *messages: Message) -> None:
+    async def add(self, *messages: Message) -> None:
         parsed_messages = []
         for message in messages:
             parsed_messages.append(message.to_dict())
-        req = {"session_id": self.session_id, "messages": parsed_messages}
-        resp = requests.post(f"{self._url}/memory/add", json=req)
-        if resp.status_code == 200:
-            return
-        _get_payload(resp)
+        req = {"session_id": self._session_id, "repository": self._repo, "messages": parsed_messages}
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self._url}/memory/add", json=req) as resp:
+                return await _get_payload(resp)
+    async def all(self) -> list[Message]:
+        req = {"session_id": self._session_id, "repository": self._repo}
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{self._url}/memory/get", json=req) as resp:
+                payload = await _get_payload(resp)
+                messages = []
+                for raw_message in payload["messages"]:
+                    messages.append(Message(raw_message["role"], raw_message["text"], raw_message["metadata"]))
+                return messages
+class Memory(AMemory):
+    def __init__(self, url, repository="default"):
+        AMemory.__init__(self, url, repository)
-    def all(self) -> list[Message]:
-        req = {"session_id": self.session_id}
-        resp = requests.get(f"{self._url}/memory/get", json=req)
-        if resp.status_code == 200:
-            payload = _get_payload(resp)
-            messages = []
-            for raw_message in payload["messages"]:
-                messages.append(Message(raw_message["role"], raw_message["text"], raw_message["metadata"]))
-            return messages
-        _get_payload(resp)
+    def create(self) -> str:
+        return wait_until(AMemory.create(self))
+    def add(self, *messages: Message) -> None:
+        wait_until(AMemory.add(self, *messages))
+    def all(self) -> list[Message]:
+        return wait_until(AMemory.all(self))

indexify/repository.py CHANGED Viewed

@@ -1,20 +1,161 @@
+import aiohttp
 import requests
-from .data_containers import *
-from .utils import _get_payload
+from .index import Index
+from .data_containers import TextChunk
+from .settings import DEFAULT_SERVICE_URL
+from .utils import _get_payload, wait_until
-class Repository:
-    def __init__(self, url, name):
-        self._url = url
-        self._name = name
+def create_repository(name: str, extractors: list = (), metadata: dict = {},
+                      service_url: str = DEFAULT_SERVICE_URL) -> dict:
+    req = {"name": name, "extractors": extractors, "metadata": metadata}
+    response = requests.post(f"{service_url}/repositories", json=req)
+    response.raise_for_status()
+    return response.json()
-    def add(self, *chunks: TextChunk) -> None:
-        parsed_chunks = []
-        for chunk in chunks:
-            parsed_chunks.append(chunk.to_dict())
-        req = {"documents": parsed_chunks}
-        resp = requests.post(f"{self._url}/repository/add_texts", json=req)
-        if resp.status_code == 200:
-            return
-        _get_payload(resp)
+def list_repositories(service_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
+    response = requests.get(f"{service_url}/repositories")
+    response.raise_for_status()
+    return response.json()['repositories']
+# TODO: consider tying this back to IndexifyExtractor
+class ExtractorBinding:
+    def __init__(self, extractor_name: str, index_name: str, filters: dict, input_params: dict):
+        self.extractor_name = extractor_name
+        self.index_name = index_name
+        self.filters = filters
+        self.input_params = input_params
+    def __repr__(self) -> str:
+        return f"ExtractorBinding(extractor_name={self.extractor_name}, index_name={self.index_name})"
+    def __str__(self) -> str:
+        return self.__repr__()
+class ARepository:
+    def __init__(self, name: str, service_url: str):
+        self.name = name
+        self._service_url = service_url
+        self.url = f"{self._service_url}/repositories/{self.name}"
+    async def run_extractors(self) -> dict:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self.url}/run_extractors") as resp:
+                return await _get_payload(resp)
+    async def add_documents(self, *documents: dict) -> None:
+        if isinstance(documents[0], dict):
+            documents = [documents[0]]  # single document passed
+        else:
+            documents = documents[0]  # list of documents passed
+        for doc in documents:
+            if "metadata" not in doc:
+                doc.update({"metadata": {}})
+        req = {"documents": documents}
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self.url}/add_texts", json=req) as resp:
+                return await _get_payload(resp)
+class Repository(ARepository):
+    def __init__(self, name: str = "default", service_url: str = DEFAULT_SERVICE_URL):
+        super().__init__(name, service_url)
+        if not self._name_exists():
+            print(f"creating repo {self.name}")
+            create_repository(name=self.name, service_url=self._service_url)
+    def add_documents(self, *documents: dict) -> None:
+        return wait_until(ARepository.add_documents(self, *documents))
+    def bind_extractor(self, extractor_name: str, index_name: str,
+                       include: dict | None = None,
+                       exclude: dict | None = None) -> dict:
+        """Bind an extractor to this repository
+        Args:
+            extractor_name (str): Name of extractor
+            index_name (str): Name of corresponding index
+            include (dict | None, optional): Conditions that must be true
+                for an extractor to run on a document in the repository.
+                Defaults to None.
+            exclude (dict | None, optional): Conditions that must be false
+                for an extractor to run on a document in the repository.
+                Defaults to None.
+        Returns:
+            dict: response payload
+        Examples:
+            >>> repo.bind_extractor("EfficientNet", "png_embeddings",
+                                    include={"file_ext": "png"})
+            >>> repo.bind_extractor("MiniLML6", "non_english",
+                                    exclude={"language": "en"})
+        """
+        filters = []
+        if include is not None:
+            filters.extend([{'eq': {k: v}} for k, v in include.items()])
+        if exclude is not None:
+            filters.extend([{'ne': {k: v}} for k, v in exclude.items()])
+        req = {"extractor_name": extractor_name,
+               "index_name": index_name,
+               "filters": filters}
+        response = requests.post(f"{self.url}/extractor_bindings", json=req)
+        response.raise_for_status()
+        return response.json()
+    @property
+    def extractor_bindings(self) -> list[ExtractorBinding]:
+        return [ExtractorBinding(**e) for e in self._get_repository_info()['extractor_bindings']]
+    @property
+    def indexes(self) -> list[Index]:
+        # TODO: implement this - can take from extractors but not correct
+        pass
+    # FIXME: query type should depend on index type
+    def query_attribute(self, index_name: str, content_id: str = None) -> dict:
+        # TODO: this should be async
+        params = {"index": index_name}
+        if content_id:
+            params.update({"content_id": content_id})
+        response = requests.get(f"{self.url}/attributes", params=params)
+        response.raise_for_status()
+        return response.json()['attributes']
+    def unbind_extractor(self, name) -> dict:
+        # TODO: implement this
+        pass
+    def run_extractors(self) -> dict:
+        return wait_until(ARepository.run_extractors(self, self.name))
+    # TODO: this should move to index
+    def search_index(self, index_name: str, query: str, top_k: int) -> list[TextChunk]:
+        # TODO: this should be async
+        req = {"index": index_name, "query": query, "k": top_k}
+        response = requests.post(f"{self.url}/search", json=req)
+        response.raise_for_status()
+        return response.json()['results']
+    def _get_repository_info(self) -> dict:
+        response = requests.get(f"{self.url}")
+        response.raise_for_status()
+        return response.json()['repository']
+    def _name_exists(self) -> bool:
+        return self.name in [r['name'] for r in list_repositories(self._service_url)]
+    def __repr__(self) -> str:
+        return f"Repository(name={self.name})"
+    def __str__(self) -> str:
+        return self.__repr__()

indexify/settings.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ DEFAULT_SERVICE_URL = "http://localhost:8900"

indexify/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
+import asyncio
+from enum import Enum
 import json
-from .data_containers import *
 class ApiException(Exception):
@@ -16,13 +17,24 @@ class Metric(str, Enum):
         return self.name.lower()
-def _get_payload(response):
-    payload = {"errors": []}
-    try:
-        payload = json.loads(response.text)
-    except:
-        raise ApiException(response.text)
-    if "errors" in payload.keys() and len(payload["errors"]) > 0:
-        raise ApiException(f"Failed to create index: {payload['errors']}")
+async def _get_payload(response):
+    response.raise_for_status()
+    resp = await response.text()
+    return json.loads(resp)
-    return payload
+def wait_until(functions):
+    single_result = False
+    if not isinstance(functions, list):
+        single_result = True
+        functions = [functions]
+    holder = []
+    async def run_and_capture_result():
+        holder.append(await asyncio.gather(*functions))
+    asyncio.run(run_and_capture_result())
+    if single_result:
+        return holder[0][0]  # single result
+    else:
+        return holder[0]  # list of results

indexify-0.0.3.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,53 @@
+Metadata-Version: 2.1
+Name: indexify
+Version: 0.0.3
+Summary: Python Client for Indexify
+Home-page: https://github.com/diptanu/indexify
+License: Apache 2.0
+Author: Diptanu Gon Choudhury
+Author-email: diptanuc@gmail.com
+Requires-Python: >=3.10.0,<4.0.0
+Classifier: License :: Other/Proprietary License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: aiohttp (>=3.4,<4.0)
+Project-URL: Repository, https://github.com/diptanu/indexify
+Description-Content-Type: text/markdown
+# Indexify Python Client
+## Installation
+This is the Python client for interacting with the Indexify service.
+To install it, simply run:
+```shell
+pip install indexify
+```
+## Usage
+See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
+## Development
+For first time setup, follow the steps [here](https://getindexify.com/develop/).
+### Steps for restarting dev server after updating server code
+```shell
+./install_python_deps.sh
+# use `-e`` if you're developing extractors
+(cd extractors && pip install -e .)
+# use `-e`` if you're developing sdk-py
+(cd sdk-py && pip install -e .)
+cargo build
+make local-dev
+# start the server
+./target/debug/indexify start-server -d -c local_config.yaml
+```

indexify-0.0.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+indexify/__init__.py,sha256=bqinywn8AgiujXVYQiS7_5HXUYPzaJW-MpkzQAc6z3k,482
+indexify/client.py,sha256=o8e9vElIfeAqTzoqQ9pXZiaXudKQR4LfRlN1DdFwnT0,1081
+indexify/data_containers.py,sha256=db5qJRN7Wm3yLzxhovxrr71Cx-yNHTOzi05xO8kjvkU,887
+indexify/extractor.py,sha256=dM8cCQpv9lAxUUSM9NyAaobU3bP1tS8Vw-WzUMlUhWM,725
+indexify/index.py,sha256=GIgmgfQPZhAAfPH2uUfbi57PiOEBLisgz3Fx-Umt-Rg,1002
+indexify/memory.py,sha256=FlV73TM4egAfniPeW7XqqEFo6ybV-nAElgU0PuuHt68,1965
+indexify/repository.py,sha256=0N2cMELrkhgWHHQ5Ahlk0N4Gp_zKd25Vbzo_z2JP-8w,6070
+indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
+indexify/utils.py,sha256=HBnm2a7F3ML3BcfverDe8lwkZs4xihjN0wDXr3GTt-c,881
+indexify-0.0.3.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+indexify-0.0.3.dist-info/METADATA,sha256=6kv-vvTqZeBvXYxH-L1fOB1tlky5u3JdALJLbR6aOMI,1350
+indexify-0.0.3.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
+indexify-0.0.3.dist-info/RECORD,,

indexify-0.0.2.dist-info/METADATA DELETED Viewed

@@ -1,18 +0,0 @@
-Metadata-Version: 2.1
-Name: indexify
-Version: 0.0.2
-Summary: Python Client for Indexify
-Home-page: https://github.com/diptanu/indexify
-License: Apache 2.0
-Author: Diptanu Gon Choudhury
-Author-email: diptanuc@gmail.com
-Requires-Python: >=3.10.0,<4.0.0
-Classifier: License :: Other/Proprietary License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: requests (>=2.28.2,<3.0.0,!=2.30.0)
-Project-URL: Repository, https://github.com/diptanu/indexify
-Description-Content-Type: text/x-rst
-# Indexify Python Client

indexify-0.0.2.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-indexify/__init__.py,sha256=51zytHqEftHkSk1kFV3BXhaXqVMvjSTDqkLbWcoDRdk,248
-indexify/data_containers.py,sha256=9R_yvKXl3U17QH7PwKf08fPKKoG7D2uttORLv9fiasQ,893
-indexify/index.py,sha256=igpAO70SNlsFTva30zWUGNuJOL5y7gfrkeCojhus1d0,593
-indexify/memory.py,sha256=B_Xr7X8LJZmruYOuS5KhisVUpaEkAQS4aUGLEfH2kzI,1228
-indexify/repository.py,sha256=HzGtsW_JJfz2XeXWuIS9k1iQvRnkR2sBJl1enHkSVDg,540
-indexify/utils.py,sha256=StVuI6A-gtfhmISmefGxzsezARC_0PRXSjlwbLN05is,640
-indexify-0.0.2.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-indexify-0.0.2.dist-info/METADATA,sha256=Ycm3ZjW-BBjPXYCi20yBzBNkK9ELfQKzXALL6h-ykXk,625
-indexify-0.0.2.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
-indexify-0.0.2.dist-info/RECORD,,

{indexify-0.0.2.dist-info → indexify-0.0.3.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{indexify-0.0.2.dist-info → indexify-0.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

indexify 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

indexify 0.0.2py3-none-any.whl → 0.0.3py3-none-any.whl