indexify 0.0.4__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.0.4 → indexify-0.0.6}/PKG-INFO +15 -20
- {indexify-0.0.4 → indexify-0.0.6}/README.md +11 -16
- {indexify-0.0.4 → indexify-0.0.6}/indexify/__init__.py +2 -4
- indexify-0.0.6/indexify/client.py +406 -0
- {indexify-0.0.4 → indexify-0.0.6}/indexify/data_containers.py +1 -0
- {indexify-0.0.4 → indexify-0.0.6}/indexify/extractor.py +6 -23
- indexify-0.0.6/indexify/extractor_binding.py +27 -0
- {indexify-0.0.4 → indexify-0.0.6}/indexify/index.py +4 -3
- {indexify-0.0.4 → indexify-0.0.6}/pyproject.toml +4 -4
- indexify-0.0.4/indexify/client.py +0 -178
- indexify-0.0.4/indexify/repository.py +0 -196
- {indexify-0.0.4 → indexify-0.0.6}/LICENSE.txt +0 -0
- {indexify-0.0.4 → indexify-0.0.6}/indexify/exceptions.py +0 -0
- {indexify-0.0.4 → indexify-0.0.6}/indexify/settings.py +0 -0
- {indexify-0.0.4 → indexify-0.0.6}/indexify/utils.py +0 -0
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.6
|
4
4
|
Summary: Python Client for Indexify
|
5
|
-
Home-page: https://github.com/
|
5
|
+
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
7
7
|
Author: Diptanu Gon Choudhury
|
8
8
|
Author-email: diptanuc@gmail.com
|
@@ -12,12 +12,15 @@ Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
|
-
Requires-Dist: httpx[http2] (>=0.
|
16
|
-
Project-URL: Repository, https://github.com/
|
15
|
+
Requires-Dist: httpx[http2] (>=0.26,<0.27)
|
16
|
+
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
|
19
19
|
# Indexify Python Client
|
20
20
|
|
21
|
+
|
22
|
+
[](https://badge.fury.io/py/indexify)
|
23
|
+
|
21
24
|
## Installation
|
22
25
|
|
23
26
|
This is the Python client for interacting with the Indexify service.
|
@@ -31,10 +34,17 @@ pip install indexify
|
|
31
34
|
## Usage
|
32
35
|
|
33
36
|
See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
|
34
|
-
Look at the [
|
37
|
+
Look at the [examples](examples) directory for more examples.
|
35
38
|
|
36
39
|
## Development
|
37
40
|
|
41
|
+
To install the client from this repository for development:
|
42
|
+
|
43
|
+
```shell
|
44
|
+
cd "path to this repository"
|
45
|
+
pip install -e .
|
46
|
+
```
|
47
|
+
|
38
48
|
Install and run the `poetry` package manager:
|
39
49
|
|
40
50
|
```shell
|
@@ -45,21 +55,6 @@ poetry install
|
|
45
55
|
More information at [https://python-poetry.org/docs/](https://python-poetry.org/docs/).
|
46
56
|
|
47
57
|
|
48
|
-
### Steps for restarting dev server after updating server code
|
49
|
-
|
50
|
-
```shell
|
51
|
-
./install_python_deps.sh
|
52
|
-
# use `-e`` if you're developing extractors
|
53
|
-
(cd extractors && pip install -e .)
|
54
|
-
# use `-e`` if you're developing sdk-py
|
55
|
-
(cd sdk-py && pip install -e .)
|
56
|
-
|
57
|
-
cargo build
|
58
|
-
make local-dev
|
59
|
-
|
60
|
-
# start the server
|
61
|
-
./target/debug/indexify start-server -d -c local_config.yaml
|
62
|
-
```
|
63
58
|
|
64
59
|
### Environment Variables
|
65
60
|
|
@@ -1,5 +1,8 @@
|
|
1
1
|
# Indexify Python Client
|
2
2
|
|
3
|
+
|
4
|
+
[](https://badge.fury.io/py/indexify)
|
5
|
+
|
3
6
|
## Installation
|
4
7
|
|
5
8
|
This is the Python client for interacting with the Indexify service.
|
@@ -13,10 +16,17 @@ pip install indexify
|
|
13
16
|
## Usage
|
14
17
|
|
15
18
|
See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
|
16
|
-
Look at the [
|
19
|
+
Look at the [examples](examples) directory for more examples.
|
17
20
|
|
18
21
|
## Development
|
19
22
|
|
23
|
+
To install the client from this repository for development:
|
24
|
+
|
25
|
+
```shell
|
26
|
+
cd "path to this repository"
|
27
|
+
pip install -e .
|
28
|
+
```
|
29
|
+
|
20
30
|
Install and run the `poetry` package manager:
|
21
31
|
|
22
32
|
```shell
|
@@ -27,21 +37,6 @@ poetry install
|
|
27
37
|
More information at [https://python-poetry.org/docs/](https://python-poetry.org/docs/).
|
28
38
|
|
29
39
|
|
30
|
-
### Steps for restarting dev server after updating server code
|
31
|
-
|
32
|
-
```shell
|
33
|
-
./install_python_deps.sh
|
34
|
-
# use `-e`` if you're developing extractors
|
35
|
-
(cd extractors && pip install -e .)
|
36
|
-
# use `-e`` if you're developing sdk-py
|
37
|
-
(cd sdk-py && pip install -e .)
|
38
|
-
|
39
|
-
cargo build
|
40
|
-
make local-dev
|
41
|
-
|
42
|
-
# start the server
|
43
|
-
./target/debug/indexify start-server -d -c local_config.yaml
|
44
|
-
```
|
45
40
|
|
46
41
|
### Environment Variables
|
47
42
|
|
@@ -1,14 +1,12 @@
|
|
1
1
|
from .index import Index
|
2
2
|
from .client import IndexifyClient
|
3
|
-
from .
|
3
|
+
from .extractor_binding import ExtractorBinding
|
4
4
|
from .data_containers import TextChunk
|
5
5
|
from .settings import DEFAULT_SERVICE_URL
|
6
6
|
|
7
7
|
__all__ = [
|
8
|
-
"Filter",
|
9
|
-
"FilterBuilder",
|
10
8
|
"Index",
|
11
9
|
"IndexifyClient",
|
12
|
-
"
|
10
|
+
"ExtractorBinding",
|
13
11
|
"DEFAULT_SERVICE_URL",
|
14
12
|
]
|
@@ -0,0 +1,406 @@
|
|
1
|
+
import httpx
|
2
|
+
import json
|
3
|
+
from collections import namedtuple
|
4
|
+
from .settings import DEFAULT_SERVICE_URL
|
5
|
+
from .extractor import Extractor
|
6
|
+
from .extractor_binding import ExtractorBinding
|
7
|
+
from .index import Index
|
8
|
+
from .utils import json_set_default
|
9
|
+
from .data_containers import TextChunk
|
10
|
+
from indexify.exceptions import ApiException
|
11
|
+
|
12
|
+
from typing import List, Optional, Union
|
13
|
+
|
14
|
+
Document = namedtuple("Document", ["text", "labels"])
|
15
|
+
|
16
|
+
|
17
|
+
class IndexifyClient:
|
18
|
+
"""
|
19
|
+
IndexifyClient is the main entry point for the SDK.
|
20
|
+
For the full list of client features, see the
|
21
|
+
[httpx Client documentation](https://www.python-httpx.org/api/#client).
|
22
|
+
|
23
|
+
:param service_url: The URL of the Indexify service to connect to.
|
24
|
+
:param args: Arguments to pass to the httpx.Client constructor
|
25
|
+
:param kwargs: Keyword arguments to pass to the httpx.Client constructor
|
26
|
+
|
27
|
+
Example usage:
|
28
|
+
```
|
29
|
+
from indexify import IndexifyClient
|
30
|
+
|
31
|
+
client = IndexifyClient()
|
32
|
+
assert client.heartbeat() == True
|
33
|
+
```
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
service_url: str = DEFAULT_SERVICE_URL,
|
39
|
+
namespace: str = "default",
|
40
|
+
*args,
|
41
|
+
**kwargs,
|
42
|
+
):
|
43
|
+
self.namespace: str = namespace
|
44
|
+
self.extractor_bindings: List[ExtractorBinding] = []
|
45
|
+
self.labels: dict = {}
|
46
|
+
self._service_url = service_url
|
47
|
+
self._client = httpx.Client(*args, **kwargs)
|
48
|
+
|
49
|
+
# get namespace data
|
50
|
+
response = self.get(f"namespaces/{self.namespace}")
|
51
|
+
response.raise_for_status()
|
52
|
+
resp_json = response.json()
|
53
|
+
# initialize extractor_bindings
|
54
|
+
for eb in resp_json["namespace"]["extractor_bindings"]:
|
55
|
+
self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
|
56
|
+
|
57
|
+
@classmethod
|
58
|
+
def with_mtls(
|
59
|
+
cls,
|
60
|
+
cert_path: str,
|
61
|
+
key_path: str,
|
62
|
+
ca_bundle_path: Optional[str] = None,
|
63
|
+
service_url: str = DEFAULT_SERVICE_URL,
|
64
|
+
*args,
|
65
|
+
**kwargs,
|
66
|
+
) -> "IndexifyClient":
|
67
|
+
"""
|
68
|
+
Create a client with mutual TLS authentication. Also enables HTTP/2,
|
69
|
+
which is required for mTLS.
|
70
|
+
NOTE: mTLS must be enabled on the Indexify service for this to work.
|
71
|
+
|
72
|
+
:param cert_path: Path to the client certificate. Resolution handled by httpx.
|
73
|
+
:param key_path: Path to the client key. Resolution handled by httpx.
|
74
|
+
:param args: Arguments to pass to the httpx.Client constructor
|
75
|
+
:param kwargs: Keyword arguments to pass to the httpx.Client constructor
|
76
|
+
:return: A client with mTLS authentication
|
77
|
+
|
78
|
+
Example usage:
|
79
|
+
```
|
80
|
+
from indexify import IndexifyClient
|
81
|
+
|
82
|
+
client = IndexifyClient.with_mtls(
|
83
|
+
cert_path="/path/to/cert.pem",
|
84
|
+
key_path="/path/to/key.pem",
|
85
|
+
)
|
86
|
+
assert client.heartbeat() == True
|
87
|
+
```
|
88
|
+
"""
|
89
|
+
if not (cert_path and key_path):
|
90
|
+
raise ValueError("Both cert and key must be provided for mTLS")
|
91
|
+
|
92
|
+
client_certs = (cert_path, key_path)
|
93
|
+
verify_option = ca_bundle_path if ca_bundle_path else True
|
94
|
+
client = IndexifyClient(
|
95
|
+
*args,
|
96
|
+
**kwargs,
|
97
|
+
service_url=service_url,
|
98
|
+
http2=True,
|
99
|
+
cert=client_certs,
|
100
|
+
verify=verify_option,
|
101
|
+
)
|
102
|
+
return client
|
103
|
+
|
104
|
+
def _request(self, method: str, **kwargs) -> httpx.Response:
|
105
|
+
response = self._client.request(method, **kwargs)
|
106
|
+
try:
|
107
|
+
response.raise_for_status()
|
108
|
+
except httpx.HTTPStatusError as exc:
|
109
|
+
print(f"exception: {exc}, response text: {response.text}")
|
110
|
+
raise exc
|
111
|
+
return response
|
112
|
+
|
113
|
+
def get(self, endpoint: str, **kwargs) -> httpx.Response:
|
114
|
+
"""
|
115
|
+
Make a GET request to the Indexify service.
|
116
|
+
|
117
|
+
:param endpoint: The endpoint to make the request to.
|
118
|
+
|
119
|
+
Example usage:
|
120
|
+
```
|
121
|
+
from indexify import IndexifyClient
|
122
|
+
|
123
|
+
client = IndexifyClient()
|
124
|
+
response = client.get("namespaces")
|
125
|
+
print(response.json())
|
126
|
+
```
|
127
|
+
"""
|
128
|
+
return self._request("GET", url=f"{self._service_url}/{endpoint}", **kwargs)
|
129
|
+
|
130
|
+
def post(self, endpoint: str, **kwargs) -> httpx.Response:
|
131
|
+
"""
|
132
|
+
Make a POST request to the Indexify service.
|
133
|
+
|
134
|
+
:param endpoint: The endpoint to make the request to.
|
135
|
+
|
136
|
+
Example usage:
|
137
|
+
|
138
|
+
```
|
139
|
+
from indexify import IndexifyClient
|
140
|
+
|
141
|
+
client = IndexifyClient()
|
142
|
+
response = client.post("namespaces", json={"name": "my-repo"})
|
143
|
+
print(response.json())
|
144
|
+
```
|
145
|
+
"""
|
146
|
+
return self._request("POST", url=f"{self._service_url}/{endpoint}", **kwargs)
|
147
|
+
|
148
|
+
def put(self, endpoint: str, **kwargs) -> httpx.Response:
|
149
|
+
# Not Implemented
|
150
|
+
raise NotImplementedError
|
151
|
+
|
152
|
+
def delete(self, endpoint: str, **kwargs) -> httpx.Response:
|
153
|
+
# Not Implemented
|
154
|
+
raise NotImplementedError
|
155
|
+
|
156
|
+
def close(self):
|
157
|
+
"""
|
158
|
+
Close the underlying httpx.Client.
|
159
|
+
"""
|
160
|
+
self._client.close()
|
161
|
+
|
162
|
+
# __enter__ and __exit__ allow the client to be used as a context manager
|
163
|
+
def __enter__(self):
|
164
|
+
return self
|
165
|
+
|
166
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
167
|
+
self.close()
|
168
|
+
|
169
|
+
def heartbeat(self, heartbeat_response="Indexify Server") -> bool:
|
170
|
+
"""
|
171
|
+
Check if the Indexify service is alive.
|
172
|
+
"""
|
173
|
+
response = self.get(f"")
|
174
|
+
# Server responds with text: "Indexify Server"
|
175
|
+
return response.text == heartbeat_response
|
176
|
+
|
177
|
+
def namespaces(self) -> list[str]:
|
178
|
+
"""
|
179
|
+
Get a list of all namespaces.
|
180
|
+
"""
|
181
|
+
response = self.get(f"namespaces")
|
182
|
+
namespaces_dict = response.json()["namespaces"]
|
183
|
+
namespaces = []
|
184
|
+
for item in namespaces_dict:
|
185
|
+
namespaces.append(item["name"])
|
186
|
+
return namespaces
|
187
|
+
|
188
|
+
@classmethod
|
189
|
+
def create_namespace(
|
190
|
+
self,
|
191
|
+
namespace: str,
|
192
|
+
extractor_bindings: list = [],
|
193
|
+
labels: dict = {},
|
194
|
+
) -> "IndexifyClient":
|
195
|
+
"""
|
196
|
+
Create a new namespace.
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
IndexifyClient: a new client with the given namespace
|
200
|
+
"""
|
201
|
+
bindings = []
|
202
|
+
for bd in extractor_bindings:
|
203
|
+
if isinstance(bd, ExtractorBinding):
|
204
|
+
bindings.append(bd.to_dict())
|
205
|
+
else:
|
206
|
+
bindings.append(bd)
|
207
|
+
req = {
|
208
|
+
"name": namespace,
|
209
|
+
"extractor_bindings": bindings,
|
210
|
+
"labels": labels,
|
211
|
+
}
|
212
|
+
|
213
|
+
client = IndexifyClient(namespace=namespace)
|
214
|
+
client.post(f"namespaces", json=req)
|
215
|
+
return client
|
216
|
+
|
217
|
+
def indexes(self) -> List[Index]:
|
218
|
+
"""
|
219
|
+
Get the indexes of the current namespace.
|
220
|
+
|
221
|
+
Returns:
|
222
|
+
List[Index]: list of indexes in the current namespace
|
223
|
+
"""
|
224
|
+
response = self.get(f"namespaces/{self.namespace}/indexes")
|
225
|
+
response.raise_for_status()
|
226
|
+
return response.json()["indexes"]
|
227
|
+
|
228
|
+
def extractors(self) -> List[Extractor]:
|
229
|
+
"""
|
230
|
+
Get a list of all extractors.
|
231
|
+
|
232
|
+
Returns:
|
233
|
+
List[Extractor]: list of extractors
|
234
|
+
"""
|
235
|
+
response = self.get(f"extractors")
|
236
|
+
extractors_dict = response.json()["extractors"]
|
237
|
+
extractors = []
|
238
|
+
for ed in extractors_dict:
|
239
|
+
extractors.append(Extractor.from_dict(ed))
|
240
|
+
return extractors
|
241
|
+
|
242
|
+
def get_extractor_bindings(self):
|
243
|
+
"""
|
244
|
+
Retrieve and update the list of extractor bindings for the current namespace.
|
245
|
+
"""
|
246
|
+
response = self.get(f"namespaces/{self.namespace}")
|
247
|
+
response.raise_for_status()
|
248
|
+
|
249
|
+
self.extractor_bindings = []
|
250
|
+
for eb in response.json()["namespace"]["extractor_bindings"]:
|
251
|
+
self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
|
252
|
+
return self.extractor_bindings
|
253
|
+
|
254
|
+
def bind_extractor(
|
255
|
+
self,
|
256
|
+
extractor: str,
|
257
|
+
name: str,
|
258
|
+
input_params: dict = {},
|
259
|
+
labels_eq: str = None,
|
260
|
+
) -> dict:
|
261
|
+
"""Bind an extractor.
|
262
|
+
|
263
|
+
Args:
|
264
|
+
- extractor (str): Name of the extractor
|
265
|
+
- name (str): Name for this instance
|
266
|
+
- input_params (dict): Dictionary containing extractor input params
|
267
|
+
- filter (Filter): Optional filter for this extractor
|
268
|
+
|
269
|
+
Returns:
|
270
|
+
dict: response payload
|
271
|
+
|
272
|
+
Examples:
|
273
|
+
>>> repo.bind_extractor("EfficientNet", "efficientnet")
|
274
|
+
|
275
|
+
>>> repo.bind_extractor("MiniLML6", "minilm")
|
276
|
+
|
277
|
+
"""
|
278
|
+
req = {
|
279
|
+
"extractor": extractor,
|
280
|
+
"name": name,
|
281
|
+
"input_params": input_params,
|
282
|
+
"filters_eq": labels_eq,
|
283
|
+
}
|
284
|
+
if req["filters_eq"] == None:
|
285
|
+
del req["filters_eq"]
|
286
|
+
|
287
|
+
request_body = json.dumps(req, default=json_set_default)
|
288
|
+
response = self.post(
|
289
|
+
f"namespaces/{self.namespace}/extractor_bindings",
|
290
|
+
data=request_body,
|
291
|
+
headers={"Content-Type": "application/json"},
|
292
|
+
)
|
293
|
+
|
294
|
+
# update self.extractor_bindings
|
295
|
+
self.get_extractor_bindings()
|
296
|
+
|
297
|
+
try:
|
298
|
+
response.raise_for_status()
|
299
|
+
except httpx.HTTPStatusError as exc:
|
300
|
+
raise ApiException(exc.response.text)
|
301
|
+
return
|
302
|
+
|
303
|
+
def get_content(
|
304
|
+
self,
|
305
|
+
parent_id: str = None,
|
306
|
+
labels_eq: str = None,
|
307
|
+
):
|
308
|
+
"""
|
309
|
+
Get list of content from current namespace.
|
310
|
+
|
311
|
+
Args:
|
312
|
+
- parent_id (str): Optional filter for parent id
|
313
|
+
- labels_eq (str): Optional filter for labels
|
314
|
+
"""
|
315
|
+
params = {}
|
316
|
+
if parent_id:
|
317
|
+
params.update({"parent_id": parent_id})
|
318
|
+
if labels_eq:
|
319
|
+
params.update({"labels_eq": labels_eq})
|
320
|
+
|
321
|
+
response = self.get(f"namespaces/{self.namespace}/content", params=params)
|
322
|
+
response.raise_for_status()
|
323
|
+
return response.json()["content_list"]
|
324
|
+
|
325
|
+
def add_documents(
|
326
|
+
self, documents: Union[Document, str, List[Union[Document, str]]]
|
327
|
+
) -> None:
|
328
|
+
"""
|
329
|
+
Add documents to current namespace.
|
330
|
+
|
331
|
+
Args:
|
332
|
+
- documents (Union[Document, str, List[Union[Document, str]]]): this can be a list of strings, list of Documents or a mix of both
|
333
|
+
"""
|
334
|
+
if isinstance(documents, Document):
|
335
|
+
documents = [documents]
|
336
|
+
elif isinstance(documents, str):
|
337
|
+
documents = [Document(documents, {})]
|
338
|
+
elif isinstance(documents, list):
|
339
|
+
new_documents = []
|
340
|
+
for item in documents:
|
341
|
+
if isinstance(item, Document):
|
342
|
+
new_documents.append(item)
|
343
|
+
elif isinstance(item, str):
|
344
|
+
new_documents.append(Document(item, {}))
|
345
|
+
else:
|
346
|
+
raise ValueError(
|
347
|
+
"List items must be either Document instances or strings."
|
348
|
+
)
|
349
|
+
documents = new_documents
|
350
|
+
else:
|
351
|
+
raise TypeError(
|
352
|
+
"Invalid type for documents. Expected Document, str, or list of these."
|
353
|
+
)
|
354
|
+
|
355
|
+
req = {"documents": documents}
|
356
|
+
response = self.post(
|
357
|
+
f"namespaces/{self.namespace}/add_texts",
|
358
|
+
json=req,
|
359
|
+
headers={"Content-Type": "application/json"},
|
360
|
+
)
|
361
|
+
response.raise_for_status()
|
362
|
+
|
363
|
+
def query_metadata(self, index_name: str, content_id: str) -> dict:
|
364
|
+
"""
|
365
|
+
Query metadata for a specific content ID in a given index.
|
366
|
+
|
367
|
+
Args:
|
368
|
+
- index_name (str): index to query
|
369
|
+
- content_id (str): content id to query
|
370
|
+
"""
|
371
|
+
params = {"index": index_name, "content_id": content_id}
|
372
|
+
response = self.get(f"namespaces/{self.namespace}/metadata", params=params)
|
373
|
+
response.raise_for_status()
|
374
|
+
return response.json()["attributes"]
|
375
|
+
|
376
|
+
def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
|
377
|
+
"""
|
378
|
+
Search index in the current namespace.
|
379
|
+
|
380
|
+
Args:
|
381
|
+
- name (str): name of index to search
|
382
|
+
- query (str): query string
|
383
|
+
- top_k (int): top k nearest neighbors to be returned
|
384
|
+
"""
|
385
|
+
req = {"index": name, "query": query, "k": top_k}
|
386
|
+
response = self.post(
|
387
|
+
f"namespaces/{self.namespace}/search",
|
388
|
+
json=req,
|
389
|
+
headers={"Content-Type": "application/json"},
|
390
|
+
)
|
391
|
+
response.raise_for_status()
|
392
|
+
return response.json()["results"]
|
393
|
+
|
394
|
+
def upload_file(self, path: str):
|
395
|
+
"""
|
396
|
+
Upload a file.
|
397
|
+
|
398
|
+
Args:
|
399
|
+
- path (str): relative path to the file to be uploaded
|
400
|
+
"""
|
401
|
+
with open(path, "rb") as f:
|
402
|
+
response = self.post(
|
403
|
+
f"namespaces/{self.namespace}/upload_file",
|
404
|
+
files={"file": f},
|
405
|
+
)
|
406
|
+
response.raise_for_status()
|
@@ -1,27 +1,10 @@
|
|
1
|
-
import
|
1
|
+
import httpx
|
2
2
|
from dataclasses import dataclass
|
3
3
|
from typing import Union
|
4
4
|
|
5
5
|
from .settings import DEFAULT_SERVICE_URL
|
6
6
|
|
7
7
|
|
8
|
-
def list_extractors(base_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
|
9
|
-
response = requests.get(f"{base_url}/extractors")
|
10
|
-
response.raise_for_status()
|
11
|
-
return response.json()["extractors"]
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class EmbeddingExtractor:
|
16
|
-
dim: int
|
17
|
-
distance: str
|
18
|
-
|
19
|
-
|
20
|
-
@dataclass
|
21
|
-
class AttributeExtractor:
|
22
|
-
json_schema: str
|
23
|
-
|
24
|
-
|
25
8
|
@dataclass
|
26
9
|
class EmbeddingSchema:
|
27
10
|
distance: str
|
@@ -38,17 +21,17 @@ class Extractor:
|
|
38
21
|
name: str
|
39
22
|
description: str
|
40
23
|
input_params: dict
|
41
|
-
|
24
|
+
outputs: ExtractorSchema
|
42
25
|
|
43
26
|
|
44
27
|
class Extractor:
|
45
28
|
def __init__(
|
46
|
-
self, name: str, description: str, input_params: dict,
|
29
|
+
self, name: str, description: str, input_params: dict, outputs: ExtractorSchema
|
47
30
|
):
|
48
31
|
self.name = name
|
49
32
|
self.description = description
|
50
33
|
self.input_params = input_params
|
51
|
-
self.
|
34
|
+
self.outputs = outputs
|
52
35
|
|
53
36
|
@classmethod
|
54
37
|
def from_dict(cls, data):
|
@@ -56,11 +39,11 @@ class Extractor:
|
|
56
39
|
name=data["name"],
|
57
40
|
description=data["description"],
|
58
41
|
input_params=data["input_params"],
|
59
|
-
|
42
|
+
outputs=data["outputs"],
|
60
43
|
)
|
61
44
|
|
62
45
|
def __repr__(self) -> str:
|
63
|
-
return f"Extractor(name={self.name}, description={self.description})"
|
46
|
+
return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, outputs={self.outputs})"
|
64
47
|
|
65
48
|
def __str__(self) -> str:
|
66
49
|
return self.__repr__()
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from dataclasses import dataclass, asdict
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class ExtractorBinding:
|
7
|
+
extractor: str
|
8
|
+
name: str
|
9
|
+
content_source: str
|
10
|
+
input_params: dict
|
11
|
+
labels_eq: Optional[str] = None
|
12
|
+
|
13
|
+
def __repr__(self) -> str:
|
14
|
+
return f"ExtractorBinding(name={self.name} extractor={self.extractor})"
|
15
|
+
|
16
|
+
def __str__(self) -> str:
|
17
|
+
return self.__repr__()
|
18
|
+
|
19
|
+
def to_dict(self) -> dict:
|
20
|
+
filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
|
21
|
+
return filtered_dict
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def from_dict(cls, json: dict):
|
25
|
+
if "filters_eq" in json:
|
26
|
+
json["labels_eq"] = json.pop("filters_eq")
|
27
|
+
return ExtractorBinding(**json)
|
@@ -1,7 +1,8 @@
|
|
1
|
-
import
|
1
|
+
import httpx
|
2
2
|
|
3
3
|
from .data_containers import SearchChunk, TextChunk
|
4
4
|
|
5
|
+
|
5
6
|
class Index:
|
6
7
|
def __init__(self, service_url, index):
|
7
8
|
self._service_url = service_url
|
@@ -9,8 +10,8 @@ class Index:
|
|
9
10
|
|
10
11
|
def search(self, query: str, top_k: int) -> list[TextChunk]:
|
11
12
|
req = {"index": self._index, "query": query, "k": top_k}
|
12
|
-
response =
|
13
|
+
response = httpx.post(
|
13
14
|
f"{self._service_url}/indexes/{self._index}/search", json=req
|
14
15
|
)
|
15
16
|
response.raise_for_status()
|
16
|
-
return response.json()["results"]
|
17
|
+
return response.json()["results"]
|
@@ -1,16 +1,16 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.6"
|
4
4
|
description = "Python Client for Indexify"
|
5
5
|
authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
|
6
6
|
license = "Apache 2.0"
|
7
7
|
readme = "README.md"
|
8
|
-
homepage = "https://github.com/
|
9
|
-
repository = "https://github.com/
|
8
|
+
homepage = "https://github.com/tensorlakeai/indexify"
|
9
|
+
repository = "https://github.com/tensorlakeai/indexify"
|
10
10
|
|
11
11
|
[tool.poetry.dependencies]
|
12
12
|
python = "^3.10.0"
|
13
|
-
httpx = { version = "^0.
|
13
|
+
httpx = { version = "^0.26", extras = ["http2"] }
|
14
14
|
|
15
15
|
[tool.poetry.dev-dependencies]
|
16
16
|
black = "^22.3.0"
|
@@ -1,178 +0,0 @@
|
|
1
|
-
import httpx
|
2
|
-
from .repository import Repository
|
3
|
-
from .settings import DEFAULT_SERVICE_URL
|
4
|
-
from .extractor import Extractor
|
5
|
-
|
6
|
-
from typing import List, Optional
|
7
|
-
|
8
|
-
|
9
|
-
class IndexifyClient:
|
10
|
-
"""
|
11
|
-
IndexifyClient is the main entry point for the SDK.
|
12
|
-
For the full list of client features, see the
|
13
|
-
[httpx Client documentation](https://www.python-httpx.org/api/#client).
|
14
|
-
|
15
|
-
:param service_url: The URL of the Indexify service to connect to.
|
16
|
-
:param args: Arguments to pass to the httpx.Client constructor
|
17
|
-
:param kwargs: Keyword arguments to pass to the httpx.Client constructor
|
18
|
-
|
19
|
-
Example usage:
|
20
|
-
```
|
21
|
-
from indexify import IndexifyClient
|
22
|
-
|
23
|
-
client = IndexifyClient()
|
24
|
-
assert client.heartbeat() == True
|
25
|
-
```
|
26
|
-
"""
|
27
|
-
def __init__(self,
|
28
|
-
service_url: str = DEFAULT_SERVICE_URL,
|
29
|
-
*args,
|
30
|
-
**kwargs
|
31
|
-
):
|
32
|
-
self._service_url = service_url
|
33
|
-
self._client = httpx.Client(*args, **kwargs)
|
34
|
-
|
35
|
-
@classmethod
|
36
|
-
def with_mtls(cls, cert_path: str, key_path: str, ca_bundle_path: Optional[str] = None,
|
37
|
-
service_url: str = DEFAULT_SERVICE_URL, *args, **kwargs) -> "IndexifyClient":
|
38
|
-
"""
|
39
|
-
Create a client with mutual TLS authentication. Also enables HTTP/2,
|
40
|
-
which is required for mTLS.
|
41
|
-
NOTE: mTLS must be enabled on the Indexify service for this to work.
|
42
|
-
|
43
|
-
:param cert_path: Path to the client certificate. Resolution handled by httpx.
|
44
|
-
:param key_path: Path to the client key. Resolution handled by httpx.
|
45
|
-
:param args: Arguments to pass to the httpx.Client constructor
|
46
|
-
:param kwargs: Keyword arguments to pass to the httpx.Client constructor
|
47
|
-
:return: A client with mTLS authentication
|
48
|
-
|
49
|
-
Example usage:
|
50
|
-
```
|
51
|
-
from indexify import IndexifyClient
|
52
|
-
|
53
|
-
client = IndexifyClient.with_mtls(
|
54
|
-
cert_path="/path/to/cert.pem",
|
55
|
-
key_path="/path/to/key.pem",
|
56
|
-
)
|
57
|
-
assert client.heartbeat() == True
|
58
|
-
```
|
59
|
-
"""
|
60
|
-
if not (cert_path and key_path):
|
61
|
-
raise ValueError("Both cert and key must be provided for mTLS")
|
62
|
-
|
63
|
-
client_certs = (cert_path, key_path)
|
64
|
-
verify_option = ca_bundle_path if ca_bundle_path else True
|
65
|
-
client = IndexifyClient(*args, **kwargs, service_url=service_url, http2=True, cert=client_certs, verify=verify_option)
|
66
|
-
return client
|
67
|
-
|
68
|
-
def _request(self, method: str, **kwargs) -> httpx.Response:
|
69
|
-
response = self._client.request(method, **kwargs)
|
70
|
-
response.raise_for_status()
|
71
|
-
return response
|
72
|
-
|
73
|
-
def get(self, endpoint: str, **kwargs) -> httpx.Response:
|
74
|
-
"""
|
75
|
-
Make a GET request to the Indexify service.
|
76
|
-
|
77
|
-
:param endpoint: The endpoint to make the request to.
|
78
|
-
|
79
|
-
Example usage:
|
80
|
-
```
|
81
|
-
from indexify import IndexifyClient
|
82
|
-
|
83
|
-
client = IndexifyClient()
|
84
|
-
response = client.get("repositories")
|
85
|
-
print(response.json())
|
86
|
-
```
|
87
|
-
"""
|
88
|
-
return self._request("GET", url=f"{self._service_url}/{endpoint}", **kwargs)
|
89
|
-
|
90
|
-
def post(self, endpoint: str, **kwargs) -> httpx.Response:
|
91
|
-
"""
|
92
|
-
Make a POST request to the Indexify service.
|
93
|
-
|
94
|
-
:param endpoint: The endpoint to make the request to.
|
95
|
-
|
96
|
-
Example usage:
|
97
|
-
|
98
|
-
```
|
99
|
-
from indexify import IndexifyClient
|
100
|
-
|
101
|
-
client = IndexifyClient()
|
102
|
-
response = client.post("repositories", json={"name": "my-repo"})
|
103
|
-
print(response.json())
|
104
|
-
```
|
105
|
-
"""
|
106
|
-
return self._request("POST", url=f"{self._service_url}/{endpoint}", **kwargs)
|
107
|
-
|
108
|
-
def put(self, endpoint: str, **kwargs) -> httpx.Response:
|
109
|
-
# Not Implemented
|
110
|
-
raise NotImplementedError
|
111
|
-
|
112
|
-
def delete(self, endpoint: str, **kwargs) -> httpx.Response:
|
113
|
-
# Not Implemented
|
114
|
-
raise NotImplementedError
|
115
|
-
|
116
|
-
def close(self):
|
117
|
-
"""
|
118
|
-
Close the underlying httpx.Client.
|
119
|
-
"""
|
120
|
-
self._client.close()
|
121
|
-
|
122
|
-
# __enter__ and __exit__ allow the client to be used as a context manager
|
123
|
-
def __enter__(self):
|
124
|
-
return self
|
125
|
-
|
126
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
127
|
-
self.close()
|
128
|
-
|
129
|
-
def heartbeat(self, heartbeat_response="Indexify Server") -> bool:
|
130
|
-
"""
|
131
|
-
Check if the Indexify service is alive.
|
132
|
-
"""
|
133
|
-
response = self.get(f"")
|
134
|
-
# Server responds with text: "Indexify Server"
|
135
|
-
return response.text == heartbeat_response
|
136
|
-
|
137
|
-
def repositories(self) -> list[Repository]:
|
138
|
-
"""
|
139
|
-
Get a list of all repositories.
|
140
|
-
"""
|
141
|
-
response = self.get(f"repositories")
|
142
|
-
repositories_dict = response.json()["repositories"]
|
143
|
-
repositories = []
|
144
|
-
for rd in repositories_dict:
|
145
|
-
repositories.append(Repository(rd["name"], self._service_url))
|
146
|
-
return repositories
|
147
|
-
|
148
|
-
def create_repository(
|
149
|
-
self, name: str, extractor_bindings: list = [], metadata: dict = {}
|
150
|
-
) -> Repository:
|
151
|
-
"""
|
152
|
-
Create a new repository.
|
153
|
-
"""
|
154
|
-
req = {
|
155
|
-
"name": name,
|
156
|
-
"extractor_bindings": extractor_bindings,
|
157
|
-
"metadata": metadata,
|
158
|
-
}
|
159
|
-
response = self.post(f"repositories", json=req)
|
160
|
-
return Repository(name, self._service_url)
|
161
|
-
|
162
|
-
def get_repository(self, name: str) -> Repository:
|
163
|
-
"""
|
164
|
-
Get a repository by name.
|
165
|
-
"""
|
166
|
-
return Repository(name, self._service_url)
|
167
|
-
|
168
|
-
def extractors(self) -> List[Extractor]:
|
169
|
-
"""
|
170
|
-
Get a list of all extractors.
|
171
|
-
"""
|
172
|
-
response = self.get(f"extractors")
|
173
|
-
extractors_dict = response.json()["extractors"]
|
174
|
-
extractors = []
|
175
|
-
for ed in extractors_dict:
|
176
|
-
extractors.append(Extractor.from_dict(ed))
|
177
|
-
return extractors
|
178
|
-
|
@@ -1,196 +0,0 @@
|
|
1
|
-
import httpx
|
2
|
-
import json
|
3
|
-
|
4
|
-
from dataclasses import dataclass
|
5
|
-
from collections import namedtuple
|
6
|
-
|
7
|
-
from .data_containers import TextChunk
|
8
|
-
from .settings import DEFAULT_SERVICE_URL
|
9
|
-
from typing import List
|
10
|
-
from .utils import json_set_default
|
11
|
-
from indexify.exceptions import ApiException
|
12
|
-
from .index import Index
|
13
|
-
|
14
|
-
Document = namedtuple("Document", ["text", "metadata"])
|
15
|
-
|
16
|
-
|
17
|
-
@dataclass
|
18
|
-
class Filter:
|
19
|
-
includes: dict[str, str]
|
20
|
-
excludes: dict[str, str]
|
21
|
-
|
22
|
-
@classmethod
|
23
|
-
def from_dict(cls, json: dict):
|
24
|
-
includes = json.get("eq", {})
|
25
|
-
excludes = json.get("ne", {})
|
26
|
-
return Filter(includes=includes, excludes=excludes)
|
27
|
-
|
28
|
-
def json(self):
|
29
|
-
filters = []
|
30
|
-
for k, v in self.includes.items():
|
31
|
-
filters.append({"eq": {k: v}})
|
32
|
-
for k, v in self.excludes.items():
|
33
|
-
filters.append({"neq": {k: v}})
|
34
|
-
return filters
|
35
|
-
|
36
|
-
|
37
|
-
class FilterBuilder:
|
38
|
-
def __init__(self) -> None:
|
39
|
-
self._filter = Filter(includes={}, excludes={})
|
40
|
-
|
41
|
-
def include(self, key: str, value: str) -> "FilterBuilder":
|
42
|
-
self._filter.includes[key] = value
|
43
|
-
return self
|
44
|
-
|
45
|
-
def exclude(self, key: str, value: str) -> "FilterBuilder":
|
46
|
-
self._filter.excludes[key] = value
|
47
|
-
return self
|
48
|
-
|
49
|
-
def build(self) -> Filter:
|
50
|
-
return self._filter
|
51
|
-
|
52
|
-
|
53
|
-
@dataclass
|
54
|
-
class ExtractorBinding:
|
55
|
-
extractor_name: str
|
56
|
-
index_name: str
|
57
|
-
filters: list[Filter]
|
58
|
-
input_params: dict
|
59
|
-
|
60
|
-
def __repr__(self) -> str:
|
61
|
-
return f"ExtractorBinding(extractor_name={self.extractor_name}, index_name={self.index_name})"
|
62
|
-
|
63
|
-
def __str__(self) -> str:
|
64
|
-
return self.__repr__()
|
65
|
-
|
66
|
-
@classmethod
|
67
|
-
def from_dict(cls, json: dict):
|
68
|
-
filters_dict = json["filters"]
|
69
|
-
filters = []
|
70
|
-
for filter_dict in filters_dict:
|
71
|
-
filters.append(Filter.from_dict(filter_dict))
|
72
|
-
json["filters"] = filters
|
73
|
-
return ExtractorBinding(**json)
|
74
|
-
|
75
|
-
|
76
|
-
class Repository:
|
77
|
-
def __init__(
|
78
|
-
self,
|
79
|
-
name: str,
|
80
|
-
service_url: str,
|
81
|
-
extractor_bindings: List[ExtractorBinding] = None,
|
82
|
-
metadata: dict = None,
|
83
|
-
) -> None:
|
84
|
-
self.name = name
|
85
|
-
self._service_url = service_url
|
86
|
-
self.extractor_bindings = extractor_bindings
|
87
|
-
self.metadata = metadata
|
88
|
-
|
89
|
-
async def run_extractors(self) -> dict:
|
90
|
-
response = httpx.post(f"{self._service_url}/run_extractors")
|
91
|
-
response.raise_for_status()
|
92
|
-
|
93
|
-
def add_documents(self, documents: List[Document]) -> None:
|
94
|
-
if isinstance(documents, Document):
|
95
|
-
documents = [documents]
|
96
|
-
req = {"documents": documents}
|
97
|
-
response = httpx.post(
|
98
|
-
f"{self._service_url}/repositories/{self.name}/add_texts",
|
99
|
-
json=req,
|
100
|
-
headers={"Content-Type": "application/json"},
|
101
|
-
)
|
102
|
-
response.raise_for_status()
|
103
|
-
|
104
|
-
def bind_extractor(
|
105
|
-
self,
|
106
|
-
extractor: str,
|
107
|
-
name: str,
|
108
|
-
input_params: dict = {},
|
109
|
-
filter: Filter = None,
|
110
|
-
) -> dict:
|
111
|
-
"""Bind an extractor to this repository
|
112
|
-
|
113
|
-
Args:
|
114
|
-
- extractor (str): Name of the extractor
|
115
|
-
- name (str): Name for this instance
|
116
|
-
- input_params (dict): Dictionary containing extractor input params
|
117
|
-
- filter (Filter): Optional filter for this extractor
|
118
|
-
|
119
|
-
Returns:
|
120
|
-
dict: response payload
|
121
|
-
|
122
|
-
Examples:
|
123
|
-
>>> repo.bind_extractor("EfficientNet", "efficientnet")
|
124
|
-
|
125
|
-
>>> repo.bind_extractor("MiniLML6", "minilm")
|
126
|
-
|
127
|
-
"""
|
128
|
-
req = {
|
129
|
-
"extractor": extractor,
|
130
|
-
"name": name,
|
131
|
-
"input_params": input_params,
|
132
|
-
"filters": filter.json() if filter else [],
|
133
|
-
}
|
134
|
-
|
135
|
-
request_body = json.dumps(req, default=json_set_default)
|
136
|
-
response = httpx.post(
|
137
|
-
f"{self._service_url}/repositories/{self.name}/extractor_bindings",
|
138
|
-
data=request_body,
|
139
|
-
headers={"Content-Type": "application/json"},
|
140
|
-
)
|
141
|
-
try:
|
142
|
-
response.raise_for_status()
|
143
|
-
except httpx.HTTPStatusError as exc:
|
144
|
-
raise ApiException(exc.response.text)
|
145
|
-
return
|
146
|
-
|
147
|
-
def indexes(self) -> List[Index]:
|
148
|
-
response = httpx.get(f"{self._service_url}/repositories/{self.name}/indexes")
|
149
|
-
response.raise_for_status()
|
150
|
-
return response.json()["indexes"]
|
151
|
-
|
152
|
-
@classmethod
|
153
|
-
def get(cls, name: str, service_url: str = DEFAULT_SERVICE_URL) -> "Repository":
|
154
|
-
response = httpx.get(f"{service_url}/repositories/{name}")
|
155
|
-
response.raise_for_status()
|
156
|
-
repository_json = response.json()["repository"]
|
157
|
-
return Repository._from_json(repository_json)
|
158
|
-
|
159
|
-
@classmethod
|
160
|
-
def _from_json(cls, service_url: str, repository_json: dict):
|
161
|
-
extractor_bindings = []
|
162
|
-
for eb in repository_json["repository"]["extractor_bindings"]:
|
163
|
-
extractor_bindings.append(ExtractorBinding.from_dict(eb))
|
164
|
-
metadata = repository_json["repository"]["metadata"]
|
165
|
-
return Repository(
|
166
|
-
name=repository_json["repository"]["name"],
|
167
|
-
service_url=service_url,
|
168
|
-
extractor_bindings=extractor_bindings,
|
169
|
-
metadata=metadata,
|
170
|
-
)
|
171
|
-
|
172
|
-
def query_attribute(self, index_name: str, content_id: str = None) -> dict:
|
173
|
-
params = {"index": index_name}
|
174
|
-
if content_id:
|
175
|
-
params.update({"content_id": content_id})
|
176
|
-
response = httpx.get(
|
177
|
-
f"{self._service_url}/repositories/{self.name}/attributes", params=params
|
178
|
-
)
|
179
|
-
response.raise_for_status()
|
180
|
-
return response.json()["attributes"]
|
181
|
-
|
182
|
-
def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
|
183
|
-
req = {"index": name, "query": query, "k": top_k}
|
184
|
-
response = httpx.post(
|
185
|
-
f"{self._service_url}/repositories/{self.name}/search",
|
186
|
-
json=req,
|
187
|
-
headers={"Content-Type": "application/json"},
|
188
|
-
)
|
189
|
-
response.raise_for_status()
|
190
|
-
return response.json()["results"]
|
191
|
-
|
192
|
-
def __repr__(self) -> str:
|
193
|
-
return f"Repository(name={self.name})"
|
194
|
-
|
195
|
-
def __str__(self) -> str:
|
196
|
-
return self.__repr__()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|