PyPI - indexify - Versions diffs - 0.0.1__tar.gz → 0.0.3__tar.gz - Mend

indexify 0.0.1tar.gz → 0.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

indexify-0.0.3/PKG-INFO +53 -0
indexify-0.0.3/README.md +35 -0
indexify-0.0.3/indexify/__init__.py +11 -0
indexify-0.0.3/indexify/client.py +30 -0
indexify-0.0.3/indexify/data_containers.py +46 -0
indexify-0.0.3/indexify/extractor.py +25 -0
indexify-0.0.3/indexify/index.py +31 -0
indexify-0.0.3/indexify/memory.py +53 -0
indexify-0.0.3/indexify/repository.py +161 -0
indexify-0.0.3/indexify/settings.py +1 -0
indexify-0.0.3/indexify/utils.py +40 -0
{indexify-0.0.1 → indexify-0.0.3}/pyproject.toml +5 -5
indexify-0.0.1/PKG-INFO +0 -18
indexify-0.0.1/README.rst +0 -1
indexify-0.0.1/indexify/__init__.py +0 -3
indexify-0.0.1/indexify/indexify.py +0 -115
{indexify-0.0.1 → indexify-0.0.3}/LICENSE.txt +0 -0

indexify-0.0.3/PKG-INFO ADDED Viewed

@@ -0,0 +1,53 @@
+Metadata-Version: 2.1
+Name: indexify
+Version: 0.0.3
+Summary: Python Client for Indexify
+Home-page: https://github.com/diptanu/indexify
+License: Apache 2.0
+Author: Diptanu Gon Choudhury
+Author-email: diptanuc@gmail.com
+Requires-Python: >=3.10.0,<4.0.0
+Classifier: License :: Other/Proprietary License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: aiohttp (>=3.4,<4.0)
+Project-URL: Repository, https://github.com/diptanu/indexify
+Description-Content-Type: text/markdown
+# Indexify Python Client
+## Installation
+This is the Python client for interacting with the Indexify service.
+To install it, simply run:
+```shell
+pip install indexify
+```
+## Usage
+See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
+## Development
+For first time setup, follow the steps [here](https://getindexify.com/develop/).
+### Steps for restarting dev server after updating server code
+```shell
+./install_python_deps.sh
+# use `-e`` if you're developing extractors
+(cd extractors && pip install -e .)
+# use `-e`` if you're developing sdk-py
+(cd sdk-py && pip install -e .)
+cargo build
+make local-dev
+# start the server
+./target/debug/indexify start-server -d -c local_config.yaml
+```

indexify-0.0.3/README.md ADDED Viewed

@@ -0,0 +1,35 @@
+# Indexify Python Client
+## Installation
+This is the Python client for interacting with the Indexify service.
+To install it, simply run:
+```shell
+pip install indexify
+```
+## Usage
+See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
+## Development
+For first time setup, follow the steps [here](https://getindexify.com/develop/).
+### Steps for restarting dev server after updating server code
+```shell
+./install_python_deps.sh
+# use `-e`` if you're developing extractors
+(cd extractors && pip install -e .)
+# use `-e`` if you're developing sdk-py
+(cd sdk-py && pip install -e .)
+cargo build
+make local-dev
+# start the server
+./target/debug/indexify start-server -d -c local_config.yaml
+```

indexify-0.0.3/indexify/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .index import Index, AIndex
+from .client import IndexifyClient
+from .memory import Memory, AMemory
+from .repository import Repository, ARepository, create_repository, list_repositories
+from .data_containers import TextChunk, Message
+from .utils import wait_until
+from .settings import DEFAULT_SERVICE_URL
+__all__ = ["Index", "Memory", "Repository", "AIndex", "AMemory", "ARepository",
+           "Message", "TextChunk", "DEFAULT_SERVICE_URL", "wait_until", "IndexifyMemory"]

indexify-0.0.3/indexify/client.py ADDED Viewed

@@ -0,0 +1,30 @@
+from .extractor import Extractor, list_extractors
+from .repository import Repository, create_repository, list_repositories
+from .settings import DEFAULT_SERVICE_URL
+class IndexifyClient:
+    def __init__(self, service_url: str = DEFAULT_SERVICE_URL):
+        self._service_url = service_url
+    def create_repository(self, name: str, extractors: list = [], metadata: dict = {}) -> dict:
+        return create_repository(name, extractors, metadata, self._service_url)
+    @property
+    def extractors(self) -> list[Extractor]:
+        return [Extractor(**extractor) for extractor in list_extractors(self._service_url)]
+    def get_or_create_repository(self, name: str) -> Repository:
+        return Repository(name=name, service_url=self._service_url)
+    def list_extractors(self) -> list[dict]:
+        return list_extractors(base_url=self._service_url)
+    def list_repositories(self) -> list[dict]:
+        return list_repositories(service_url=self._service_url)
+    @property
+    def repositories(self) -> list[Repository]:
+        # TODO: implement this
+        pass

indexify-0.0.3/indexify/data_containers.py ADDED Viewed

@@ -0,0 +1,46 @@
+from enum import Enum
+from typing import List
+from dataclasses import dataclass, field
+class TextSplitter(str, Enum):
+    NEWLINE = "new_line"
+    REGEX = "regex"
+    NOOP = "noop"
+    def __str__(self) -> str:
+        return self.value.lower()
+@dataclass
+class TextChunk:
+    text: str
+    metadata: dict[str, any] = field(default_factory=dict)
+    def to_dict(self):
+        return {"text": self.text, "metadata": self.metadata}
+@dataclass
+class Message:
+    role: str
+    text: str
+    metadata: dict[str, any] = field(default_factory=dict)
+    def to_dict(self):
+        return {"role": self.role, "text": self.text, "metadata": self.metadata}
+@dataclass
+class SearchChunk:
+    index: str
+    query: str
+    k: int
+    def to_dict(self):
+        return {"index": self.index, "query": self.query, "k": self.k}
+@dataclass
+class SearchResult:
+    results: List[TextChunk]

indexify-0.0.3/indexify/extractor.py ADDED Viewed

@@ -0,0 +1,25 @@
+import requests
+from .settings import DEFAULT_SERVICE_URL
+def list_extractors(base_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
+    response = requests.get(f"{base_url}/extractors")
+    response.raise_for_status()
+    return response.json()['extractors']
+# TODO: consider naming this IndexifyExtractor
+# TODO: consider making this a dataclass
+class Extractor:
+    def __init__(self, name: str, description: str, extractor_type : dict):
+        self.name = name
+        self.description = description
+        self.extractor_type = extractor_type
+    def __repr__(self) -> str:
+        return f"Extractor(name={self.name}, description={self.description})"
+    def __str__(self) -> str:
+        return self.__repr__()

indexify-0.0.3/indexify/index.py ADDED Viewed

@@ -0,0 +1,31 @@
+import aiohttp
+from .data_containers import SearchChunk, TextChunk
+from .utils import _get_payload, wait_until
+class AIndex:
+    def __init__(self, url: str, index: str = "default/default"):
+        self._url = url
+        self._index = index
+    async def search(self, query: str, top_k: int) -> list[TextChunk]:
+        req = SearchChunk(index=self._index, query=query, k=top_k)
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{self._url}/index/search", json=req.to_dict()) as resp:
+                payload = await _get_payload(resp)
+                result = []
+                for res in payload["results"]:
+                    result.append(TextChunk(text=res["text"], metadata=res["metadata"]))
+                return result
+class Index(AIndex):
+    def __init__(self, url, index):
+        AIndex.__init__(self, url, index)
+    def search(self, query: str, top_k: int) -> list[TextChunk]:
+        wait_until(AIndex.search(self, query, top_k))

indexify-0.0.3/indexify/memory.py ADDED Viewed

@@ -0,0 +1,53 @@
+import aiohttp
+from .data_containers import *
+from .utils import _get_payload, wait_until
+class AMemory:
+    def __init__(self, url, repository="default"):
+        self._session_id = None
+        self._url = url
+        self._repo = repository
+    async def create(self) -> str:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self._url}/memory/create", json={"repository": self._repo}) as resp:
+                resp = await _get_payload(resp)
+                self._session_id = resp["session_id"]
+        return self._session_id
+    async def add(self, *messages: Message) -> None:
+        parsed_messages = []
+        for message in messages:
+            parsed_messages.append(message.to_dict())
+        req = {"session_id": self._session_id, "repository": self._repo, "messages": parsed_messages}
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self._url}/memory/add", json=req) as resp:
+                return await _get_payload(resp)
+    async def all(self) -> list[Message]:
+        req = {"session_id": self._session_id, "repository": self._repo}
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{self._url}/memory/get", json=req) as resp:
+                payload = await _get_payload(resp)
+                messages = []
+                for raw_message in payload["messages"]:
+                    messages.append(Message(raw_message["role"], raw_message["text"], raw_message["metadata"]))
+                return messages
+class Memory(AMemory):
+    def __init__(self, url, repository="default"):
+        AMemory.__init__(self, url, repository)
+    def create(self) -> str:
+        return wait_until(AMemory.create(self))
+    def add(self, *messages: Message) -> None:
+        wait_until(AMemory.add(self, *messages))
+    def all(self) -> list[Message]:
+        return wait_until(AMemory.all(self))

indexify-0.0.3/indexify/repository.py ADDED Viewed

@@ -0,0 +1,161 @@
+import aiohttp
+import requests
+from .index import Index
+from .data_containers import TextChunk
+from .settings import DEFAULT_SERVICE_URL
+from .utils import _get_payload, wait_until
+def create_repository(name: str, extractors: list = (), metadata: dict = {},
+                      service_url: str = DEFAULT_SERVICE_URL) -> dict:
+    req = {"name": name, "extractors": extractors, "metadata": metadata}
+    response = requests.post(f"{service_url}/repositories", json=req)
+    response.raise_for_status()
+    return response.json()
+def list_repositories(service_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
+    response = requests.get(f"{service_url}/repositories")
+    response.raise_for_status()
+    return response.json()['repositories']
+# TODO: consider tying this back to IndexifyExtractor
+class ExtractorBinding:
+    def __init__(self, extractor_name: str, index_name: str, filters: dict, input_params: dict):
+        self.extractor_name = extractor_name
+        self.index_name = index_name
+        self.filters = filters
+        self.input_params = input_params
+    def __repr__(self) -> str:
+        return f"ExtractorBinding(extractor_name={self.extractor_name}, index_name={self.index_name})"
+    def __str__(self) -> str:
+        return self.__repr__()
+class ARepository:
+    def __init__(self, name: str, service_url: str):
+        self.name = name
+        self._service_url = service_url
+        self.url = f"{self._service_url}/repositories/{self.name}"
+    async def run_extractors(self) -> dict:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self.url}/run_extractors") as resp:
+                return await _get_payload(resp)
+    async def add_documents(self, *documents: dict) -> None:
+        if isinstance(documents[0], dict):
+            documents = [documents[0]]  # single document passed
+        else:
+            documents = documents[0]  # list of documents passed
+        for doc in documents:
+            if "metadata" not in doc:
+                doc.update({"metadata": {}})
+        req = {"documents": documents}
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{self.url}/add_texts", json=req) as resp:
+                return await _get_payload(resp)
+class Repository(ARepository):
+    def __init__(self, name: str = "default", service_url: str = DEFAULT_SERVICE_URL):
+        super().__init__(name, service_url)
+        if not self._name_exists():
+            print(f"creating repo {self.name}")
+            create_repository(name=self.name, service_url=self._service_url)
+    def add_documents(self, *documents: dict) -> None:
+        return wait_until(ARepository.add_documents(self, *documents))
+    def bind_extractor(self, extractor_name: str, index_name: str,
+                       include: dict | None = None,
+                       exclude: dict | None = None) -> dict:
+        """Bind an extractor to this repository
+        Args:
+            extractor_name (str): Name of extractor
+            index_name (str): Name of corresponding index
+            include (dict | None, optional): Conditions that must be true
+                for an extractor to run on a document in the repository.
+                Defaults to None.
+            exclude (dict | None, optional): Conditions that must be false
+                for an extractor to run on a document in the repository.
+                Defaults to None.
+        Returns:
+            dict: response payload
+        Examples:
+            >>> repo.bind_extractor("EfficientNet", "png_embeddings",
+                                    include={"file_ext": "png"})
+            >>> repo.bind_extractor("MiniLML6", "non_english",
+                                    exclude={"language": "en"})
+        """
+        filters = []
+        if include is not None:
+            filters.extend([{'eq': {k: v}} for k, v in include.items()])
+        if exclude is not None:
+            filters.extend([{'ne': {k: v}} for k, v in exclude.items()])
+        req = {"extractor_name": extractor_name,
+               "index_name": index_name,
+               "filters": filters}
+        response = requests.post(f"{self.url}/extractor_bindings", json=req)
+        response.raise_for_status()
+        return response.json()
+    @property
+    def extractor_bindings(self) -> list[ExtractorBinding]:
+        return [ExtractorBinding(**e) for e in self._get_repository_info()['extractor_bindings']]
+    @property
+    def indexes(self) -> list[Index]:
+        # TODO: implement this - can take from extractors but not correct
+        pass
+    # FIXME: query type should depend on index type
+    def query_attribute(self, index_name: str, content_id: str = None) -> dict:
+        # TODO: this should be async
+        params = {"index": index_name}
+        if content_id:
+            params.update({"content_id": content_id})
+        response = requests.get(f"{self.url}/attributes", params=params)
+        response.raise_for_status()
+        return response.json()['attributes']
+    def unbind_extractor(self, name) -> dict:
+        # TODO: implement this
+        pass
+    def run_extractors(self) -> dict:
+        return wait_until(ARepository.run_extractors(self, self.name))
+    # TODO: this should move to index
+    def search_index(self, index_name: str, query: str, top_k: int) -> list[TextChunk]:
+        # TODO: this should be async
+        req = {"index": index_name, "query": query, "k": top_k}
+        response = requests.post(f"{self.url}/search", json=req)
+        response.raise_for_status()
+        return response.json()['results']
+    def _get_repository_info(self) -> dict:
+        response = requests.get(f"{self.url}")
+        response.raise_for_status()
+        return response.json()['repository']
+    def _name_exists(self) -> bool:
+        return self.name in [r['name'] for r in list_repositories(self._service_url)]
+    def __repr__(self) -> str:
+        return f"Repository(name={self.name})"
+    def __str__(self) -> str:
+        return self.__repr__()

indexify-0.0.3/indexify/settings.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ DEFAULT_SERVICE_URL = "http://localhost:8900"

indexify-0.0.3/indexify/utils.py ADDED Viewed

@@ -0,0 +1,40 @@
+import asyncio
+from enum import Enum
+import json
+class ApiException(Exception):
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
+class Metric(str, Enum):
+    COSINE = "cosine"
+    DOT = "dot"
+    EUCLIDEAN = "euclidean"
+    def __str__(self) -> str:
+        return self.name.lower()
+async def _get_payload(response):
+    response.raise_for_status()
+    resp = await response.text()
+    return json.loads(resp)
+def wait_until(functions):
+    single_result = False
+    if not isinstance(functions, list):
+        single_result = True
+        functions = [functions]
+    holder = []
+    async def run_and_capture_result():
+        holder.append(await asyncio.gather(*functions))
+    asyncio.run(run_and_capture_result())
+    if single_result:
+        return holder[0][0]  # single result
+    else:
+        return holder[0]  # list of results

{indexify-0.0.1 → indexify-0.0.3}/pyproject.toml RENAMED Viewed

@@ -1,16 +1,16 @@
 [tool.poetry]
 name = "indexify"
-version = "0.0.1"
+version = "0.0.3"
 description = "Python Client for Indexify"
-authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>"]
+authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
 license = "Apache 2.0"
-readme = "README.rst"
+readme = "README.md"
 homepage = "https://github.com/diptanu/indexify"
 repository = "https://github.com/diptanu/indexify"
 [tool.poetry.dependencies]
 python = "^3.10.0"
-requests = "^2.28.2, !=2.30.0"
+aiohttp = "^3.4"
 [tool.poetry.dev-dependencies]
 black = "^22.3.0"
@@ -22,4 +22,4 @@ pytest-watch = "^4.2.0"
 [build-system]
 requires = ["poetry>=1.2"]
-build-backend = "poetry.masonry.api"
+build-backend = "poetry.core.masonry.api"

indexify-0.0.1/PKG-INFO DELETED Viewed

@@ -1,18 +0,0 @@
-Metadata-Version: 2.1
-Name: indexify
-Version: 0.0.1
-Summary: Python Client for Indexify
-Home-page: https://github.com/diptanu/indexify
-License: Apache 2.0
-Author: Diptanu Gon Choudhury
-Author-email: diptanuc@gmail.com
-Requires-Python: >=3.10.0,<4.0.0
-Classifier: License :: Other/Proprietary License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: requests (>=2.28.2,<3.0.0,!=2.30.0)
-Project-URL: Repository, https://github.com/diptanu/indexify
-Description-Content-Type: text/x-rst
-# Indexify Python Client

indexify-0.0.1/README.rst DELETED Viewed

	@@ -1 +0,0 @@
1	- # Indexify Python Client

indexify-0.0.1/indexify/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-"""
-Python Client for Indexify
-"""

indexify-0.0.1/indexify/indexify.py DELETED Viewed

@@ -1,115 +0,0 @@
-from typing import Optional, List
-from enum import Enum
-import requests
-import json
-import dataclasses
-from dataclasses import dataclass
-DEFAULT_INDEXIFY_URL = "https://localhost:8090"
-DEFAULT_EMBEDDING_MODEL = "all-minilm-l6-v2"
-class ApiException(Exception):
-    def __init__(self, message: str) -> None:
-        super().__init__(message)
-class Metric(str, Enum):
-    COSINE = "cosine"
-    DOT = "dot"
-    EUCLIDEAN = "euclidean"
-    def __str__(self) -> str:
-        return self.name.lower()
-class TextSplitter(str, Enum):
-    NEWLINE = "new_line"
-    REGEX = "regex"
-    NOOP = "noop"
-    def __str__(self) -> str:
-        return self.value.lower()
-@dataclass
-class TextChunk:
-    text: str
-    metadata: dict
-    def to_json(self):
-        return json.dumps({"text": self.text, "metadata": self.metadata})
-@dataclass
-class SearchChunk:
-    index: str
-    query: str
-    k: int
-@dataclass
-class SearchResult:
-    results: List[TextChunk]
-class Indexify:
-    def __init__(self, url, index) -> None:
-        self._url = url
-        self._index = index
-    @classmethod
-    def create_index(
-        cls,
-        name: str,
-        indexify_url: Optional[str] = DEFAULT_INDEXIFY_URL,
-        embedding_model: Optional[str] = DEFAULT_EMBEDDING_MODEL,
-        metric: Metric = Metric.COSINE,
-        splitter: Optional[str] = TextSplitter.NEWLINE,
-        unique_labels=Optional[List[str]],
-    ):
-        req = {
-            "name": name,
-            "embedding_model": embedding_model,
-            "metric": metric,
-            "text_splitter": splitter,
-            "hash_on": unique_labels,
-        }
-        resp = requests.post(f"{indexify_url}/index/create", json=req)
-        if resp.status_code == 200:
-            return cls(indexify_url, name)
-        Indexify._get_payload(resp)
-    @classmethod
-    def get_index(cls, name: str, indexify_url: Optional[str]):
-        return cls(indexify_url, name)
-    def add_text_chunk(self, chunk: str, metadata: dict):
-        text_chunk = TextChunk(chunk, metadata)
-        req = {"index": self._index, "documents": [dataclasses.asdict(text_chunk)]}
-        resp = requests.post(f"{self._url}/index/add", json=req)
-        if resp.status_code == 200:
-            return
-        self._get_payload(resp)
-    def search(self, query: str, top_k: int):
-        req = SearchChunk(index=self._index, query=query, k=top_k)
-        resp = requests.get(f"{self._url}/index/search", json=dataclasses.asdict(req))
-        payload = self._get_payload(resp)
-        result = SearchResult(results=[])
-        for res in payload["results"]:
-            result.results.append(TextChunk(text=res["text"], metadata=res["metadata"]))
-        return result
-    @staticmethod
-    def _get_payload(response):
-        payload = {"errors": []}
-        try:
-            payload = json.loads(response.text)
-        except:
-            raise ApiException(response.text)
-        if len(payload["errors"]) > 0:
-            raise ApiException(f"Failed to create index: {payload['errors']}")
-        return payload

{indexify-0.0.1 → indexify-0.0.3}/LICENSE.txt RENAMED Viewed

File without changes

indexify 0.0.1__tar.gz → 0.0.3__tar.gz

indexify 0.0.1tar.gz → 0.0.3tar.gz