indexify 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/__init__.py CHANGED
@@ -1,7 +1,11 @@
1
- from .index import Index, Message, TextChunk
2
- from .memory import Memory
3
- from .repository import Repository
1
+ from .index import Index, AIndex
2
+ from .client import IndexifyClient
3
+ from .memory import Memory, AMemory
4
+ from .repository import Repository, ARepository, create_repository, list_repositories
5
+ from .data_containers import TextChunk, Message
6
+ from .utils import wait_until
7
+ from .settings import DEFAULT_SERVICE_URL
4
8
 
5
- DEFAULT_INDEXIFY_URL = "http://localhost:8900"
6
9
 
7
- __all__ = ["Index", "Memory", "Message", "TextChunk", "Repository", "DEFAULT_INDEXIFY_URL"]
10
+ __all__ = ["Index", "Memory", "Repository", "AIndex", "AMemory", "ARepository",
11
+ "Message", "TextChunk", "DEFAULT_SERVICE_URL", "wait_until", "IndexifyMemory"]
indexify/client.py ADDED
@@ -0,0 +1,30 @@
1
+ from .extractor import Extractor, list_extractors
2
+ from .repository import Repository, create_repository, list_repositories
3
+ from .settings import DEFAULT_SERVICE_URL
4
+
5
+
6
+ class IndexifyClient:
7
+
8
+ def __init__(self, service_url: str = DEFAULT_SERVICE_URL):
9
+ self._service_url = service_url
10
+
11
+ def create_repository(self, name: str, extractors: list = [], metadata: dict = {}) -> dict:
12
+ return create_repository(name, extractors, metadata, self._service_url)
13
+
14
+ @property
15
+ def extractors(self) -> list[Extractor]:
16
+ return [Extractor(**extractor) for extractor in list_extractors(self._service_url)]
17
+
18
+ def get_or_create_repository(self, name: str) -> Repository:
19
+ return Repository(name=name, service_url=self._service_url)
20
+
21
+ def list_extractors(self) -> list[dict]:
22
+ return list_extractors(base_url=self._service_url)
23
+
24
+ def list_repositories(self) -> list[dict]:
25
+ return list_repositories(service_url=self._service_url)
26
+
27
+ @property
28
+ def repositories(self) -> list[Repository]:
29
+ # TODO: implement this
30
+ pass
@@ -1,5 +1,5 @@
1
1
  from enum import Enum
2
- from typing import Optional, List
2
+ from typing import List
3
3
  from dataclasses import dataclass, field
4
4
 
5
5
 
@@ -11,6 +11,7 @@ class TextSplitter(str, Enum):
11
11
  def __str__(self) -> str:
12
12
  return self.value.lower()
13
13
 
14
+
14
15
  @dataclass
15
16
  class TextChunk:
16
17
  text: str
@@ -19,6 +20,7 @@ class TextChunk:
19
20
  def to_dict(self):
20
21
  return {"text": self.text, "metadata": self.metadata}
21
22
 
23
+
22
24
  @dataclass
23
25
  class Message:
24
26
  role: str
@@ -28,6 +30,7 @@ class Message:
28
30
  def to_dict(self):
29
31
  return {"role": self.role, "text": self.text, "metadata": self.metadata}
30
32
 
33
+
31
34
  @dataclass
32
35
  class SearchChunk:
33
36
  index: str
@@ -37,6 +40,7 @@ class SearchChunk:
37
40
  def to_dict(self):
38
41
  return {"index": self.index, "query": self.query, "k": self.k}
39
42
 
43
+
40
44
  @dataclass
41
45
  class SearchResult:
42
46
  results: List[TextChunk]
indexify/extractor.py ADDED
@@ -0,0 +1,25 @@
1
+ import requests
2
+
3
+ from .settings import DEFAULT_SERVICE_URL
4
+
5
+
6
+ def list_extractors(base_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
7
+ response = requests.get(f"{base_url}/extractors")
8
+ response.raise_for_status()
9
+ return response.json()['extractors']
10
+
11
+
12
+ # TODO: consider naming this IndexifyExtractor
13
+ # TODO: consider making this a dataclass
14
+ class Extractor:
15
+
16
+ def __init__(self, name: str, description: str, extractor_type : dict):
17
+ self.name = name
18
+ self.description = description
19
+ self.extractor_type = extractor_type
20
+
21
+ def __repr__(self) -> str:
22
+ return f"Extractor(name={self.name}, description={self.description})"
23
+
24
+ def __str__(self) -> str:
25
+ return self.__repr__()
indexify/index.py CHANGED
@@ -1,20 +1,31 @@
1
- import requests
1
+ import aiohttp
2
2
 
3
- from .data_containers import *
4
- from .utils import _get_payload
3
+ from .data_containers import SearchChunk, TextChunk
4
+ from .utils import _get_payload, wait_until
5
5
 
6
6
 
7
- class Index:
7
+ class AIndex:
8
8
 
9
- def __init__(self, url, index):
9
+ def __init__(self, url: str, index: str = "default/default"):
10
10
  self._url = url
11
11
  self._index = index
12
12
 
13
- def search(self, query: str, top_k: int) -> list[TextChunk]:
13
+ async def search(self, query: str, top_k: int) -> list[TextChunk]:
14
14
  req = SearchChunk(index=self._index, query=query, k=top_k)
15
- resp = requests.get(f"{self._url}/index/search", json=req.to_dict())
16
- payload = _get_payload(resp)
17
- result = []
18
- for res in payload["results"]:
19
- result.append(TextChunk(text=res["text"], metadata=res["metadata"]))
20
- return result
15
+ async with aiohttp.ClientSession() as session:
16
+ async with session.get(f"{self._url}/index/search", json=req.to_dict()) as resp:
17
+ payload = await _get_payload(resp)
18
+ result = []
19
+ for res in payload["results"]:
20
+ result.append(TextChunk(text=res["text"], metadata=res["metadata"]))
21
+ return result
22
+
23
+
24
+ class Index(AIndex):
25
+
26
+ def __init__(self, url, index):
27
+ AIndex.__init__(self, url, index)
28
+
29
+ def search(self, query: str, top_k: int) -> list[TextChunk]:
30
+ wait_until(AIndex.search(self, query, top_k))
31
+
indexify/memory.py CHANGED
@@ -1,39 +1,53 @@
1
- import requests
1
+ import aiohttp
2
2
 
3
3
  from .data_containers import *
4
- from .utils import _get_payload
4
+ from .utils import _get_payload, wait_until
5
5
 
6
6
 
7
- class Memory:
7
+ class AMemory:
8
8
 
9
- def __init__(self, url, index):
9
+ def __init__(self, url, repository="default"):
10
+ self._session_id = None
10
11
  self._url = url
11
- self._index = index
12
+ self._repo = repository
12
13
 
13
- def create(self) -> str:
14
- resp = requests.post(f"{self._url}/memory/create", json={})
15
- self.session_id = _get_payload(resp)["session_id"]
16
- return self.session_id
14
+ async def create(self) -> str:
15
+ async with aiohttp.ClientSession() as session:
16
+ async with session.post(f"{self._url}/memory/create", json={"repository": self._repo}) as resp:
17
+ resp = await _get_payload(resp)
18
+ self._session_id = resp["session_id"]
19
+ return self._session_id
17
20
 
18
- def add(self, *messages: Message) -> None:
21
+ async def add(self, *messages: Message) -> None:
19
22
  parsed_messages = []
20
23
  for message in messages:
21
24
  parsed_messages.append(message.to_dict())
22
25
 
23
- req = {"session_id": self.session_id, "messages": parsed_messages}
24
- resp = requests.post(f"{self._url}/memory/add", json=req)
25
- if resp.status_code == 200:
26
- return
27
- _get_payload(resp)
26
+ req = {"session_id": self._session_id, "repository": self._repo, "messages": parsed_messages}
27
+ async with aiohttp.ClientSession() as session:
28
+ async with session.post(f"{self._url}/memory/add", json=req) as resp:
29
+ return await _get_payload(resp)
30
+
31
+ async def all(self) -> list[Message]:
32
+ req = {"session_id": self._session_id, "repository": self._repo}
33
+ async with aiohttp.ClientSession() as session:
34
+ async with session.get(f"{self._url}/memory/get", json=req) as resp:
35
+ payload = await _get_payload(resp)
36
+ messages = []
37
+ for raw_message in payload["messages"]:
38
+ messages.append(Message(raw_message["role"], raw_message["text"], raw_message["metadata"]))
39
+ return messages
40
+
41
+
42
+ class Memory(AMemory):
43
+ def __init__(self, url, repository="default"):
44
+ AMemory.__init__(self, url, repository)
28
45
 
29
- def all(self) -> list[Message]:
30
- req = {"session_id": self.session_id}
31
- resp = requests.get(f"{self._url}/memory/get", json=req)
32
- if resp.status_code == 200:
33
- payload = _get_payload(resp)
34
- messages = []
35
- for raw_message in payload["messages"]:
36
- messages.append(Message(raw_message["role"], raw_message["text"], raw_message["metadata"]))
37
- return messages
38
- _get_payload(resp)
46
+ def create(self) -> str:
47
+ return wait_until(AMemory.create(self))
39
48
 
49
+ def add(self, *messages: Message) -> None:
50
+ wait_until(AMemory.add(self, *messages))
51
+
52
+ def all(self) -> list[Message]:
53
+ return wait_until(AMemory.all(self))
indexify/repository.py CHANGED
@@ -1,20 +1,161 @@
1
+ import aiohttp
1
2
  import requests
2
3
 
3
- from .data_containers import *
4
- from .utils import _get_payload
4
+ from .index import Index
5
+ from .data_containers import TextChunk
6
+ from .settings import DEFAULT_SERVICE_URL
7
+ from .utils import _get_payload, wait_until
5
8
 
6
- class Repository:
7
9
 
8
- def __init__(self, url, name):
9
- self._url = url
10
- self._name = name
10
+ def create_repository(name: str, extractors: list = (), metadata: dict = {},
11
+ service_url: str = DEFAULT_SERVICE_URL) -> dict:
12
+ req = {"name": name, "extractors": extractors, "metadata": metadata}
13
+ response = requests.post(f"{service_url}/repositories", json=req)
14
+ response.raise_for_status()
15
+ return response.json()
11
16
 
12
- def add(self, *chunks: TextChunk) -> None:
13
- parsed_chunks = []
14
- for chunk in chunks:
15
- parsed_chunks.append(chunk.to_dict())
16
- req = {"documents": parsed_chunks}
17
- resp = requests.post(f"{self._url}/repository/add_texts", json=req)
18
- if resp.status_code == 200:
19
- return
20
- _get_payload(resp)
17
+
18
+ def list_repositories(service_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
19
+ response = requests.get(f"{service_url}/repositories")
20
+ response.raise_for_status()
21
+ return response.json()['repositories']
22
+
23
+
24
+ # TODO: consider tying this back to IndexifyExtractor
25
+ class ExtractorBinding:
26
+
27
+ def __init__(self, extractor_name: str, index_name: str, filters: dict, input_params: dict):
28
+ self.extractor_name = extractor_name
29
+ self.index_name = index_name
30
+ self.filters = filters
31
+ self.input_params = input_params
32
+
33
+ def __repr__(self) -> str:
34
+ return f"ExtractorBinding(extractor_name={self.extractor_name}, index_name={self.index_name})"
35
+
36
+ def __str__(self) -> str:
37
+ return self.__repr__()
38
+
39
+
40
+ class ARepository:
41
+
42
+ def __init__(self, name: str, service_url: str):
43
+ self.name = name
44
+ self._service_url = service_url
45
+ self.url = f"{self._service_url}/repositories/{self.name}"
46
+
47
+ async def run_extractors(self) -> dict:
48
+ async with aiohttp.ClientSession() as session:
49
+ async with session.post(f"{self.url}/run_extractors") as resp:
50
+ return await _get_payload(resp)
51
+
52
+ async def add_documents(self, *documents: dict) -> None:
53
+ if isinstance(documents[0], dict):
54
+ documents = [documents[0]] # single document passed
55
+ else:
56
+ documents = documents[0] # list of documents passed
57
+ for doc in documents:
58
+ if "metadata" not in doc:
59
+ doc.update({"metadata": {}})
60
+ req = {"documents": documents}
61
+ async with aiohttp.ClientSession() as session:
62
+ async with session.post(f"{self.url}/add_texts", json=req) as resp:
63
+ return await _get_payload(resp)
64
+
65
+
66
+ class Repository(ARepository):
67
+
68
+ def __init__(self, name: str = "default", service_url: str = DEFAULT_SERVICE_URL):
69
+ super().__init__(name, service_url)
70
+ if not self._name_exists():
71
+ print(f"creating repo {self.name}")
72
+ create_repository(name=self.name, service_url=self._service_url)
73
+
74
+ def add_documents(self, *documents: dict) -> None:
75
+ return wait_until(ARepository.add_documents(self, *documents))
76
+
77
+ def bind_extractor(self, extractor_name: str, index_name: str,
78
+ include: dict | None = None,
79
+ exclude: dict | None = None) -> dict:
80
+ """Bind an extractor to this repository
81
+
82
+ Args:
83
+ extractor_name (str): Name of extractor
84
+ index_name (str): Name of corresponding index
85
+ include (dict | None, optional): Conditions that must be true
86
+ for an extractor to run on a document in the repository.
87
+ Defaults to None.
88
+ exclude (dict | None, optional): Conditions that must be false
89
+ for an extractor to run on a document in the repository.
90
+ Defaults to None.
91
+
92
+ Returns:
93
+ dict: response payload
94
+
95
+ Examples:
96
+ >>> repo.bind_extractor("EfficientNet", "png_embeddings",
97
+ include={"file_ext": "png"})
98
+
99
+ >>> repo.bind_extractor("MiniLML6", "non_english",
100
+ exclude={"language": "en"})
101
+
102
+ """
103
+ filters = []
104
+ if include is not None:
105
+ filters.extend([{'eq': {k: v}} for k, v in include.items()])
106
+ if exclude is not None:
107
+ filters.extend([{'ne': {k: v}} for k, v in exclude.items()])
108
+ req = {"extractor_name": extractor_name,
109
+ "index_name": index_name,
110
+ "filters": filters}
111
+ response = requests.post(f"{self.url}/extractor_bindings", json=req)
112
+ response.raise_for_status()
113
+ return response.json()
114
+
115
+ @property
116
+ def extractor_bindings(self) -> list[ExtractorBinding]:
117
+ return [ExtractorBinding(**e) for e in self._get_repository_info()['extractor_bindings']]
118
+
119
+ @property
120
+ def indexes(self) -> list[Index]:
121
+ # TODO: implement this - can take from extractors but not correct
122
+ pass
123
+
124
+ # FIXME: query type should depend on index type
125
+ def query_attribute(self, index_name: str, content_id: str = None) -> dict:
126
+ # TODO: this should be async
127
+ params = {"index": index_name}
128
+ if content_id:
129
+ params.update({"content_id": content_id})
130
+ response = requests.get(f"{self.url}/attributes", params=params)
131
+ response.raise_for_status()
132
+ return response.json()['attributes']
133
+
134
+ def unbind_extractor(self, name) -> dict:
135
+ # TODO: implement this
136
+ pass
137
+
138
+ def run_extractors(self) -> dict:
139
+ return wait_until(ARepository.run_extractors(self, self.name))
140
+
141
+ # TODO: this should move to index
142
+ def search_index(self, index_name: str, query: str, top_k: int) -> list[TextChunk]:
143
+ # TODO: this should be async
144
+ req = {"index": index_name, "query": query, "k": top_k}
145
+ response = requests.post(f"{self.url}/search", json=req)
146
+ response.raise_for_status()
147
+ return response.json()['results']
148
+
149
+ def _get_repository_info(self) -> dict:
150
+ response = requests.get(f"{self.url}")
151
+ response.raise_for_status()
152
+ return response.json()['repository']
153
+
154
+ def _name_exists(self) -> bool:
155
+ return self.name in [r['name'] for r in list_repositories(self._service_url)]
156
+
157
+ def __repr__(self) -> str:
158
+ return f"Repository(name={self.name})"
159
+
160
+ def __str__(self) -> str:
161
+ return self.__repr__()
indexify/settings.py ADDED
@@ -0,0 +1 @@
1
+ DEFAULT_SERVICE_URL = "http://localhost:8900"
indexify/utils.py CHANGED
@@ -1,5 +1,6 @@
1
+ import asyncio
2
+ from enum import Enum
1
3
  import json
2
- from .data_containers import *
3
4
 
4
5
 
5
6
  class ApiException(Exception):
@@ -16,13 +17,24 @@ class Metric(str, Enum):
16
17
  return self.name.lower()
17
18
 
18
19
 
19
- def _get_payload(response):
20
- payload = {"errors": []}
21
- try:
22
- payload = json.loads(response.text)
23
- except:
24
- raise ApiException(response.text)
25
- if "errors" in payload.keys() and len(payload["errors"]) > 0:
26
- raise ApiException(f"Failed to create index: {payload['errors']}")
20
+ async def _get_payload(response):
21
+ response.raise_for_status()
22
+ resp = await response.text()
23
+ return json.loads(resp)
27
24
 
28
- return payload
25
+
26
+ def wait_until(functions):
27
+ single_result = False
28
+ if not isinstance(functions, list):
29
+ single_result = True
30
+ functions = [functions]
31
+ holder = []
32
+
33
+ async def run_and_capture_result():
34
+ holder.append(await asyncio.gather(*functions))
35
+
36
+ asyncio.run(run_and_capture_result())
37
+ if single_result:
38
+ return holder[0][0] # single result
39
+ else:
40
+ return holder[0] # list of results
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.1
2
+ Name: indexify
3
+ Version: 0.0.3
4
+ Summary: Python Client for Indexify
5
+ Home-page: https://github.com/diptanu/indexify
6
+ License: Apache 2.0
7
+ Author: Diptanu Gon Choudhury
8
+ Author-email: diptanuc@gmail.com
9
+ Requires-Python: >=3.10.0,<4.0.0
10
+ Classifier: License :: Other/Proprietary License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Requires-Dist: aiohttp (>=3.4,<4.0)
15
+ Project-URL: Repository, https://github.com/diptanu/indexify
16
+ Description-Content-Type: text/markdown
17
+
18
+ # Indexify Python Client
19
+
20
+ ## Installation
21
+
22
+ This is the Python client for interacting with the Indexify service.
23
+
24
+ To install it, simply run:
25
+
26
+ ```shell
27
+ pip install indexify
28
+ ```
29
+
30
+ ## Usage
31
+
32
+ See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
33
+
34
+ ## Development
35
+
36
+ For first time setup, follow the steps [here](https://getindexify.com/develop/).
37
+
38
+ ### Steps for restarting dev server after updating server code
39
+
40
+ ```shell
41
+ ./install_python_deps.sh
42
+ # use `-e`` if you're developing extractors
43
+ (cd extractors && pip install -e .)
44
+ # use `-e`` if you're developing sdk-py
45
+ (cd sdk-py && pip install -e .)
46
+
47
+ cargo build
48
+ make local-dev
49
+
50
+ # start the server
51
+ ./target/debug/indexify start-server -d -c local_config.yaml
52
+ ```
53
+
@@ -0,0 +1,13 @@
1
+ indexify/__init__.py,sha256=bqinywn8AgiujXVYQiS7_5HXUYPzaJW-MpkzQAc6z3k,482
2
+ indexify/client.py,sha256=o8e9vElIfeAqTzoqQ9pXZiaXudKQR4LfRlN1DdFwnT0,1081
3
+ indexify/data_containers.py,sha256=db5qJRN7Wm3yLzxhovxrr71Cx-yNHTOzi05xO8kjvkU,887
4
+ indexify/extractor.py,sha256=dM8cCQpv9lAxUUSM9NyAaobU3bP1tS8Vw-WzUMlUhWM,725
5
+ indexify/index.py,sha256=GIgmgfQPZhAAfPH2uUfbi57PiOEBLisgz3Fx-Umt-Rg,1002
6
+ indexify/memory.py,sha256=FlV73TM4egAfniPeW7XqqEFo6ybV-nAElgU0PuuHt68,1965
7
+ indexify/repository.py,sha256=0N2cMELrkhgWHHQ5Ahlk0N4Gp_zKd25Vbzo_z2JP-8w,6070
8
+ indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
9
+ indexify/utils.py,sha256=HBnm2a7F3ML3BcfverDe8lwkZs4xihjN0wDXr3GTt-c,881
10
+ indexify-0.0.3.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
+ indexify-0.0.3.dist-info/METADATA,sha256=6kv-vvTqZeBvXYxH-L1fOB1tlky5u3JdALJLbR6aOMI,1350
12
+ indexify-0.0.3.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
13
+ indexify-0.0.3.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: indexify
3
- Version: 0.0.2
4
- Summary: Python Client for Indexify
5
- Home-page: https://github.com/diptanu/indexify
6
- License: Apache 2.0
7
- Author: Diptanu Gon Choudhury
8
- Author-email: diptanuc@gmail.com
9
- Requires-Python: >=3.10.0,<4.0.0
10
- Classifier: License :: Other/Proprietary License
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Requires-Dist: requests (>=2.28.2,<3.0.0,!=2.30.0)
15
- Project-URL: Repository, https://github.com/diptanu/indexify
16
- Description-Content-Type: text/x-rst
17
-
18
- # Indexify Python Client
@@ -1,10 +0,0 @@
1
- indexify/__init__.py,sha256=51zytHqEftHkSk1kFV3BXhaXqVMvjSTDqkLbWcoDRdk,248
2
- indexify/data_containers.py,sha256=9R_yvKXl3U17QH7PwKf08fPKKoG7D2uttORLv9fiasQ,893
3
- indexify/index.py,sha256=igpAO70SNlsFTva30zWUGNuJOL5y7gfrkeCojhus1d0,593
4
- indexify/memory.py,sha256=B_Xr7X8LJZmruYOuS5KhisVUpaEkAQS4aUGLEfH2kzI,1228
5
- indexify/repository.py,sha256=HzGtsW_JJfz2XeXWuIS9k1iQvRnkR2sBJl1enHkSVDg,540
6
- indexify/utils.py,sha256=StVuI6A-gtfhmISmefGxzsezARC_0PRXSjlwbLN05is,640
7
- indexify-0.0.2.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- indexify-0.0.2.dist-info/METADATA,sha256=Ycm3ZjW-BBjPXYCi20yBzBNkK9ELfQKzXALL6h-ykXk,625
9
- indexify-0.0.2.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
10
- indexify-0.0.2.dist-info/RECORD,,