indexify 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/__init__.py +2 -4
- indexify/client.py +261 -33
- indexify/data_containers.py +1 -0
- indexify/extractor.py +6 -23
- indexify/extractor_binding.py +27 -0
- indexify/index.py +4 -3
- {indexify-0.0.4.dist-info → indexify-0.0.6.dist-info}/METADATA +15 -20
- indexify-0.0.6.dist-info/RECORD +13 -0
- indexify/repository.py +0 -196
- indexify-0.0.4.dist-info/RECORD +0 -13
- {indexify-0.0.4.dist-info → indexify-0.0.6.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.4.dist-info → indexify-0.0.6.dist-info}/WHEEL +0 -0
indexify/__init__.py
CHANGED
@@ -1,14 +1,12 @@
|
|
1
1
|
from .index import Index
|
2
2
|
from .client import IndexifyClient
|
3
|
-
from .
|
3
|
+
from .extractor_binding import ExtractorBinding
|
4
4
|
from .data_containers import TextChunk
|
5
5
|
from .settings import DEFAULT_SERVICE_URL
|
6
6
|
|
7
7
|
__all__ = [
|
8
|
-
"Filter",
|
9
|
-
"FilterBuilder",
|
10
8
|
"Index",
|
11
9
|
"IndexifyClient",
|
12
|
-
"
|
10
|
+
"ExtractorBinding",
|
13
11
|
"DEFAULT_SERVICE_URL",
|
14
12
|
]
|
indexify/client.py
CHANGED
@@ -1,9 +1,17 @@
|
|
1
1
|
import httpx
|
2
|
-
|
2
|
+
import json
|
3
|
+
from collections import namedtuple
|
3
4
|
from .settings import DEFAULT_SERVICE_URL
|
4
5
|
from .extractor import Extractor
|
6
|
+
from .extractor_binding import ExtractorBinding
|
7
|
+
from .index import Index
|
8
|
+
from .utils import json_set_default
|
9
|
+
from .data_containers import TextChunk
|
10
|
+
from indexify.exceptions import ApiException
|
5
11
|
|
6
|
-
from typing import List, Optional
|
12
|
+
from typing import List, Optional, Union
|
13
|
+
|
14
|
+
Document = namedtuple("Document", ["text", "labels"])
|
7
15
|
|
8
16
|
|
9
17
|
class IndexifyClient:
|
@@ -24,17 +32,38 @@ class IndexifyClient:
|
|
24
32
|
assert client.heartbeat() == True
|
25
33
|
```
|
26
34
|
"""
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
35
|
+
|
36
|
+
def __init__(
|
37
|
+
self,
|
38
|
+
service_url: str = DEFAULT_SERVICE_URL,
|
39
|
+
namespace: str = "default",
|
40
|
+
*args,
|
41
|
+
**kwargs,
|
42
|
+
):
|
43
|
+
self.namespace: str = namespace
|
44
|
+
self.extractor_bindings: List[ExtractorBinding] = []
|
45
|
+
self.labels: dict = {}
|
32
46
|
self._service_url = service_url
|
33
47
|
self._client = httpx.Client(*args, **kwargs)
|
34
48
|
|
49
|
+
# get namespace data
|
50
|
+
response = self.get(f"namespaces/{self.namespace}")
|
51
|
+
response.raise_for_status()
|
52
|
+
resp_json = response.json()
|
53
|
+
# initialize extractor_bindings
|
54
|
+
for eb in resp_json["namespace"]["extractor_bindings"]:
|
55
|
+
self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
|
56
|
+
|
35
57
|
@classmethod
|
36
|
-
def with_mtls(
|
37
|
-
|
58
|
+
def with_mtls(
|
59
|
+
cls,
|
60
|
+
cert_path: str,
|
61
|
+
key_path: str,
|
62
|
+
ca_bundle_path: Optional[str] = None,
|
63
|
+
service_url: str = DEFAULT_SERVICE_URL,
|
64
|
+
*args,
|
65
|
+
**kwargs,
|
66
|
+
) -> "IndexifyClient":
|
38
67
|
"""
|
39
68
|
Create a client with mutual TLS authentication. Also enables HTTP/2,
|
40
69
|
which is required for mTLS.
|
@@ -62,12 +91,23 @@ class IndexifyClient:
|
|
62
91
|
|
63
92
|
client_certs = (cert_path, key_path)
|
64
93
|
verify_option = ca_bundle_path if ca_bundle_path else True
|
65
|
-
client = IndexifyClient(
|
94
|
+
client = IndexifyClient(
|
95
|
+
*args,
|
96
|
+
**kwargs,
|
97
|
+
service_url=service_url,
|
98
|
+
http2=True,
|
99
|
+
cert=client_certs,
|
100
|
+
verify=verify_option,
|
101
|
+
)
|
66
102
|
return client
|
67
103
|
|
68
104
|
def _request(self, method: str, **kwargs) -> httpx.Response:
|
69
105
|
response = self._client.request(method, **kwargs)
|
70
|
-
|
106
|
+
try:
|
107
|
+
response.raise_for_status()
|
108
|
+
except httpx.HTTPStatusError as exc:
|
109
|
+
print(f"exception: {exc}, response text: {response.text}")
|
110
|
+
raise exc
|
71
111
|
return response
|
72
112
|
|
73
113
|
def get(self, endpoint: str, **kwargs) -> httpx.Response:
|
@@ -81,7 +121,7 @@ class IndexifyClient:
|
|
81
121
|
from indexify import IndexifyClient
|
82
122
|
|
83
123
|
client = IndexifyClient()
|
84
|
-
response = client.get("
|
124
|
+
response = client.get("namespaces")
|
85
125
|
print(response.json())
|
86
126
|
```
|
87
127
|
"""
|
@@ -99,7 +139,7 @@ class IndexifyClient:
|
|
99
139
|
from indexify import IndexifyClient
|
100
140
|
|
101
141
|
client = IndexifyClient()
|
102
|
-
response = client.post("
|
142
|
+
response = client.post("namespaces", json={"name": "my-repo"})
|
103
143
|
print(response.json())
|
104
144
|
```
|
105
145
|
"""
|
@@ -134,40 +174,63 @@ class IndexifyClient:
|
|
134
174
|
# Server responds with text: "Indexify Server"
|
135
175
|
return response.text == heartbeat_response
|
136
176
|
|
137
|
-
def
|
177
|
+
def namespaces(self) -> list[str]:
|
138
178
|
"""
|
139
|
-
Get a list of all
|
179
|
+
Get a list of all namespaces.
|
140
180
|
"""
|
141
|
-
response = self.get(f"
|
142
|
-
|
143
|
-
|
144
|
-
for
|
145
|
-
|
146
|
-
return
|
181
|
+
response = self.get(f"namespaces")
|
182
|
+
namespaces_dict = response.json()["namespaces"]
|
183
|
+
namespaces = []
|
184
|
+
for item in namespaces_dict:
|
185
|
+
namespaces.append(item["name"])
|
186
|
+
return namespaces
|
147
187
|
|
148
|
-
|
149
|
-
|
150
|
-
|
188
|
+
@classmethod
|
189
|
+
def create_namespace(
|
190
|
+
self,
|
191
|
+
namespace: str,
|
192
|
+
extractor_bindings: list = [],
|
193
|
+
labels: dict = {},
|
194
|
+
) -> "IndexifyClient":
|
151
195
|
"""
|
152
|
-
Create a new
|
196
|
+
Create a new namespace.
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
IndexifyClient: a new client with the given namespace
|
153
200
|
"""
|
201
|
+
bindings = []
|
202
|
+
for bd in extractor_bindings:
|
203
|
+
if isinstance(bd, ExtractorBinding):
|
204
|
+
bindings.append(bd.to_dict())
|
205
|
+
else:
|
206
|
+
bindings.append(bd)
|
154
207
|
req = {
|
155
|
-
"name":
|
156
|
-
"extractor_bindings":
|
157
|
-
"
|
208
|
+
"name": namespace,
|
209
|
+
"extractor_bindings": bindings,
|
210
|
+
"labels": labels,
|
158
211
|
}
|
159
|
-
response = self.post(f"repositories", json=req)
|
160
|
-
return Repository(name, self._service_url)
|
161
212
|
|
162
|
-
|
213
|
+
client = IndexifyClient(namespace=namespace)
|
214
|
+
client.post(f"namespaces", json=req)
|
215
|
+
return client
|
216
|
+
|
217
|
+
def indexes(self) -> List[Index]:
|
163
218
|
"""
|
164
|
-
Get
|
219
|
+
Get the indexes of the current namespace.
|
220
|
+
|
221
|
+
Returns:
|
222
|
+
List[Index]: list of indexes in the current namespace
|
165
223
|
"""
|
166
|
-
|
224
|
+
response = self.get(f"namespaces/{self.namespace}/indexes")
|
225
|
+
response.raise_for_status()
|
226
|
+
return response.json()["indexes"]
|
167
227
|
|
168
228
|
def extractors(self) -> List[Extractor]:
|
169
229
|
"""
|
170
230
|
Get a list of all extractors.
|
231
|
+
|
232
|
+
Returns:
|
233
|
+
List[Extractor]: list of extractors
|
171
234
|
"""
|
172
235
|
response = self.get(f"extractors")
|
173
236
|
extractors_dict = response.json()["extractors"]
|
@@ -176,3 +239,168 @@ class IndexifyClient:
|
|
176
239
|
extractors.append(Extractor.from_dict(ed))
|
177
240
|
return extractors
|
178
241
|
|
242
|
+
def get_extractor_bindings(self):
|
243
|
+
"""
|
244
|
+
Retrieve and update the list of extractor bindings for the current namespace.
|
245
|
+
"""
|
246
|
+
response = self.get(f"namespaces/{self.namespace}")
|
247
|
+
response.raise_for_status()
|
248
|
+
|
249
|
+
self.extractor_bindings = []
|
250
|
+
for eb in response.json()["namespace"]["extractor_bindings"]:
|
251
|
+
self.extractor_bindings.append(ExtractorBinding.from_dict(eb))
|
252
|
+
return self.extractor_bindings
|
253
|
+
|
254
|
+
def bind_extractor(
|
255
|
+
self,
|
256
|
+
extractor: str,
|
257
|
+
name: str,
|
258
|
+
input_params: dict = {},
|
259
|
+
labels_eq: str = None,
|
260
|
+
) -> dict:
|
261
|
+
"""Bind an extractor.
|
262
|
+
|
263
|
+
Args:
|
264
|
+
- extractor (str): Name of the extractor
|
265
|
+
- name (str): Name for this instance
|
266
|
+
- input_params (dict): Dictionary containing extractor input params
|
267
|
+
- filter (Filter): Optional filter for this extractor
|
268
|
+
|
269
|
+
Returns:
|
270
|
+
dict: response payload
|
271
|
+
|
272
|
+
Examples:
|
273
|
+
>>> repo.bind_extractor("EfficientNet", "efficientnet")
|
274
|
+
|
275
|
+
>>> repo.bind_extractor("MiniLML6", "minilm")
|
276
|
+
|
277
|
+
"""
|
278
|
+
req = {
|
279
|
+
"extractor": extractor,
|
280
|
+
"name": name,
|
281
|
+
"input_params": input_params,
|
282
|
+
"filters_eq": labels_eq,
|
283
|
+
}
|
284
|
+
if req["filters_eq"] == None:
|
285
|
+
del req["filters_eq"]
|
286
|
+
|
287
|
+
request_body = json.dumps(req, default=json_set_default)
|
288
|
+
response = self.post(
|
289
|
+
f"namespaces/{self.namespace}/extractor_bindings",
|
290
|
+
data=request_body,
|
291
|
+
headers={"Content-Type": "application/json"},
|
292
|
+
)
|
293
|
+
|
294
|
+
# update self.extractor_bindings
|
295
|
+
self.get_extractor_bindings()
|
296
|
+
|
297
|
+
try:
|
298
|
+
response.raise_for_status()
|
299
|
+
except httpx.HTTPStatusError as exc:
|
300
|
+
raise ApiException(exc.response.text)
|
301
|
+
return
|
302
|
+
|
303
|
+
def get_content(
|
304
|
+
self,
|
305
|
+
parent_id: str = None,
|
306
|
+
labels_eq: str = None,
|
307
|
+
):
|
308
|
+
"""
|
309
|
+
Get list of content from current namespace.
|
310
|
+
|
311
|
+
Args:
|
312
|
+
- parent_id (str): Optional filter for parent id
|
313
|
+
- labels_eq (str): Optional filter for labels
|
314
|
+
"""
|
315
|
+
params = {}
|
316
|
+
if parent_id:
|
317
|
+
params.update({"parent_id": parent_id})
|
318
|
+
if labels_eq:
|
319
|
+
params.update({"labels_eq": labels_eq})
|
320
|
+
|
321
|
+
response = self.get(f"namespaces/{self.namespace}/content", params=params)
|
322
|
+
response.raise_for_status()
|
323
|
+
return response.json()["content_list"]
|
324
|
+
|
325
|
+
def add_documents(
|
326
|
+
self, documents: Union[Document, str, List[Union[Document, str]]]
|
327
|
+
) -> None:
|
328
|
+
"""
|
329
|
+
Add documents to current namespace.
|
330
|
+
|
331
|
+
Args:
|
332
|
+
- documents (Union[Document, str, List[Union[Document, str]]]): this can be a list of strings, list of Documents or a mix of both
|
333
|
+
"""
|
334
|
+
if isinstance(documents, Document):
|
335
|
+
documents = [documents]
|
336
|
+
elif isinstance(documents, str):
|
337
|
+
documents = [Document(documents, {})]
|
338
|
+
elif isinstance(documents, list):
|
339
|
+
new_documents = []
|
340
|
+
for item in documents:
|
341
|
+
if isinstance(item, Document):
|
342
|
+
new_documents.append(item)
|
343
|
+
elif isinstance(item, str):
|
344
|
+
new_documents.append(Document(item, {}))
|
345
|
+
else:
|
346
|
+
raise ValueError(
|
347
|
+
"List items must be either Document instances or strings."
|
348
|
+
)
|
349
|
+
documents = new_documents
|
350
|
+
else:
|
351
|
+
raise TypeError(
|
352
|
+
"Invalid type for documents. Expected Document, str, or list of these."
|
353
|
+
)
|
354
|
+
|
355
|
+
req = {"documents": documents}
|
356
|
+
response = self.post(
|
357
|
+
f"namespaces/{self.namespace}/add_texts",
|
358
|
+
json=req,
|
359
|
+
headers={"Content-Type": "application/json"},
|
360
|
+
)
|
361
|
+
response.raise_for_status()
|
362
|
+
|
363
|
+
def query_metadata(self, index_name: str, content_id: str) -> dict:
|
364
|
+
"""
|
365
|
+
Query metadata for a specific content ID in a given index.
|
366
|
+
|
367
|
+
Args:
|
368
|
+
- index_name (str): index to query
|
369
|
+
- content_id (str): content id to query
|
370
|
+
"""
|
371
|
+
params = {"index": index_name, "content_id": content_id}
|
372
|
+
response = self.get(f"namespaces/{self.namespace}/metadata", params=params)
|
373
|
+
response.raise_for_status()
|
374
|
+
return response.json()["attributes"]
|
375
|
+
|
376
|
+
def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
|
377
|
+
"""
|
378
|
+
Search index in the current namespace.
|
379
|
+
|
380
|
+
Args:
|
381
|
+
- name (str): name of index to search
|
382
|
+
- query (str): query string
|
383
|
+
- top_k (int): top k nearest neighbors to be returned
|
384
|
+
"""
|
385
|
+
req = {"index": name, "query": query, "k": top_k}
|
386
|
+
response = self.post(
|
387
|
+
f"namespaces/{self.namespace}/search",
|
388
|
+
json=req,
|
389
|
+
headers={"Content-Type": "application/json"},
|
390
|
+
)
|
391
|
+
response.raise_for_status()
|
392
|
+
return response.json()["results"]
|
393
|
+
|
394
|
+
def upload_file(self, path: str):
|
395
|
+
"""
|
396
|
+
Upload a file.
|
397
|
+
|
398
|
+
Args:
|
399
|
+
- path (str): relative path to the file to be uploaded
|
400
|
+
"""
|
401
|
+
with open(path, "rb") as f:
|
402
|
+
response = self.post(
|
403
|
+
f"namespaces/{self.namespace}/upload_file",
|
404
|
+
files={"file": f},
|
405
|
+
)
|
406
|
+
response.raise_for_status()
|
indexify/data_containers.py
CHANGED
indexify/extractor.py
CHANGED
@@ -1,27 +1,10 @@
|
|
1
|
-
import
|
1
|
+
import httpx
|
2
2
|
from dataclasses import dataclass
|
3
3
|
from typing import Union
|
4
4
|
|
5
5
|
from .settings import DEFAULT_SERVICE_URL
|
6
6
|
|
7
7
|
|
8
|
-
def list_extractors(base_url: str = DEFAULT_SERVICE_URL) -> list[dict]:
|
9
|
-
response = requests.get(f"{base_url}/extractors")
|
10
|
-
response.raise_for_status()
|
11
|
-
return response.json()["extractors"]
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class EmbeddingExtractor:
|
16
|
-
dim: int
|
17
|
-
distance: str
|
18
|
-
|
19
|
-
|
20
|
-
@dataclass
|
21
|
-
class AttributeExtractor:
|
22
|
-
json_schema: str
|
23
|
-
|
24
|
-
|
25
8
|
@dataclass
|
26
9
|
class EmbeddingSchema:
|
27
10
|
distance: str
|
@@ -38,17 +21,17 @@ class Extractor:
|
|
38
21
|
name: str
|
39
22
|
description: str
|
40
23
|
input_params: dict
|
41
|
-
|
24
|
+
outputs: ExtractorSchema
|
42
25
|
|
43
26
|
|
44
27
|
class Extractor:
|
45
28
|
def __init__(
|
46
|
-
self, name: str, description: str, input_params: dict,
|
29
|
+
self, name: str, description: str, input_params: dict, outputs: ExtractorSchema
|
47
30
|
):
|
48
31
|
self.name = name
|
49
32
|
self.description = description
|
50
33
|
self.input_params = input_params
|
51
|
-
self.
|
34
|
+
self.outputs = outputs
|
52
35
|
|
53
36
|
@classmethod
|
54
37
|
def from_dict(cls, data):
|
@@ -56,11 +39,11 @@ class Extractor:
|
|
56
39
|
name=data["name"],
|
57
40
|
description=data["description"],
|
58
41
|
input_params=data["input_params"],
|
59
|
-
|
42
|
+
outputs=data["outputs"],
|
60
43
|
)
|
61
44
|
|
62
45
|
def __repr__(self) -> str:
|
63
|
-
return f"Extractor(name={self.name}, description={self.description})"
|
46
|
+
return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, outputs={self.outputs})"
|
64
47
|
|
65
48
|
def __str__(self) -> str:
|
66
49
|
return self.__repr__()
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from dataclasses import dataclass, asdict
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class ExtractorBinding:
|
7
|
+
extractor: str
|
8
|
+
name: str
|
9
|
+
content_source: str
|
10
|
+
input_params: dict
|
11
|
+
labels_eq: Optional[str] = None
|
12
|
+
|
13
|
+
def __repr__(self) -> str:
|
14
|
+
return f"ExtractorBinding(name={self.name} extractor={self.extractor})"
|
15
|
+
|
16
|
+
def __str__(self) -> str:
|
17
|
+
return self.__repr__()
|
18
|
+
|
19
|
+
def to_dict(self) -> dict:
|
20
|
+
filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
|
21
|
+
return filtered_dict
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def from_dict(cls, json: dict):
|
25
|
+
if "filters_eq" in json:
|
26
|
+
json["labels_eq"] = json.pop("filters_eq")
|
27
|
+
return ExtractorBinding(**json)
|
indexify/index.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
import
|
1
|
+
import httpx
|
2
2
|
|
3
3
|
from .data_containers import SearchChunk, TextChunk
|
4
4
|
|
5
|
+
|
5
6
|
class Index:
|
6
7
|
def __init__(self, service_url, index):
|
7
8
|
self._service_url = service_url
|
@@ -9,8 +10,8 @@ class Index:
|
|
9
10
|
|
10
11
|
def search(self, query: str, top_k: int) -> list[TextChunk]:
|
11
12
|
req = {"index": self._index, "query": query, "k": top_k}
|
12
|
-
response =
|
13
|
+
response = httpx.post(
|
13
14
|
f"{self._service_url}/indexes/{self._index}/search", json=req
|
14
15
|
)
|
15
16
|
response.raise_for_status()
|
16
|
-
return response.json()["results"]
|
17
|
+
return response.json()["results"]
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.6
|
4
4
|
Summary: Python Client for Indexify
|
5
|
-
Home-page: https://github.com/
|
5
|
+
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
7
7
|
Author: Diptanu Gon Choudhury
|
8
8
|
Author-email: diptanuc@gmail.com
|
@@ -12,12 +12,15 @@ Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
|
-
Requires-Dist: httpx[http2] (>=0.
|
16
|
-
Project-URL: Repository, https://github.com/
|
15
|
+
Requires-Dist: httpx[http2] (>=0.26,<0.27)
|
16
|
+
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
17
17
|
Description-Content-Type: text/markdown
|
18
18
|
|
19
19
|
# Indexify Python Client
|
20
20
|
|
21
|
+
|
22
|
+
[](https://badge.fury.io/py/indexify)
|
23
|
+
|
21
24
|
## Installation
|
22
25
|
|
23
26
|
This is the Python client for interacting with the Indexify service.
|
@@ -31,10 +34,17 @@ pip install indexify
|
|
31
34
|
## Usage
|
32
35
|
|
33
36
|
See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
|
34
|
-
Look at the [
|
37
|
+
Look at the [examples](examples) directory for more examples.
|
35
38
|
|
36
39
|
## Development
|
37
40
|
|
41
|
+
To install the client from this repository for development:
|
42
|
+
|
43
|
+
```shell
|
44
|
+
cd "path to this repository"
|
45
|
+
pip install -e .
|
46
|
+
```
|
47
|
+
|
38
48
|
Install and run the `poetry` package manager:
|
39
49
|
|
40
50
|
```shell
|
@@ -45,21 +55,6 @@ poetry install
|
|
45
55
|
More information at [https://python-poetry.org/docs/](https://python-poetry.org/docs/).
|
46
56
|
|
47
57
|
|
48
|
-
### Steps for restarting dev server after updating server code
|
49
|
-
|
50
|
-
```shell
|
51
|
-
./install_python_deps.sh
|
52
|
-
# use `-e`` if you're developing extractors
|
53
|
-
(cd extractors && pip install -e .)
|
54
|
-
# use `-e`` if you're developing sdk-py
|
55
|
-
(cd sdk-py && pip install -e .)
|
56
|
-
|
57
|
-
cargo build
|
58
|
-
make local-dev
|
59
|
-
|
60
|
-
# start the server
|
61
|
-
./target/debug/indexify start-server -d -c local_config.yaml
|
62
|
-
```
|
63
58
|
|
64
59
|
### Environment Variables
|
65
60
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
indexify/__init__.py,sha256=rNHNCfTl0zQf--3miNHf1RdXhYs3i446LpBhFYoXjrw,290
|
2
|
+
indexify/client.py,sha256=DYFkroiNIZWTTOFcIyAzg8GzWlBBFswCaQ4RNtQGAxU,12828
|
3
|
+
indexify/data_containers.py,sha256=--KSx_T5EsoBOemFPKSLVcKtvy3OELgPnLp4kCESHkI,690
|
4
|
+
indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
|
5
|
+
indexify/extractor.py,sha256=FPWVd93m4edg4T6vVMl5MGtxsfLi5Vws9n9vLgLCSE8,1130
|
6
|
+
indexify/extractor_binding.py,sha256=jVu1lAvstllNz13GxEN-IlLx3XEjzz8QCBk1SsMwsCE,721
|
7
|
+
indexify/index.py,sha256=kELxdTnS9ddkEaxqbeZdsbYLEfZ9qi4HiqTPaznOoAo,517
|
8
|
+
indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
|
9
|
+
indexify/utils.py,sha256=wvQB9VpS07iZPOrpmt2i3VIncU6YBkHdpu6rXegDT3Y,282
|
10
|
+
indexify-0.0.6.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
11
|
+
indexify-0.0.6.dist-info/METADATA,sha256=f6LGiyItVhg-Rb-r_GDLKY3txqMxJZZaZXr8TPhrrdM,1713
|
12
|
+
indexify-0.0.6.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
13
|
+
indexify-0.0.6.dist-info/RECORD,,
|
indexify/repository.py
DELETED
@@ -1,196 +0,0 @@
|
|
1
|
-
import httpx
|
2
|
-
import json
|
3
|
-
|
4
|
-
from dataclasses import dataclass
|
5
|
-
from collections import namedtuple
|
6
|
-
|
7
|
-
from .data_containers import TextChunk
|
8
|
-
from .settings import DEFAULT_SERVICE_URL
|
9
|
-
from typing import List
|
10
|
-
from .utils import json_set_default
|
11
|
-
from indexify.exceptions import ApiException
|
12
|
-
from .index import Index
|
13
|
-
|
14
|
-
Document = namedtuple("Document", ["text", "metadata"])
|
15
|
-
|
16
|
-
|
17
|
-
@dataclass
|
18
|
-
class Filter:
|
19
|
-
includes: dict[str, str]
|
20
|
-
excludes: dict[str, str]
|
21
|
-
|
22
|
-
@classmethod
|
23
|
-
def from_dict(cls, json: dict):
|
24
|
-
includes = json.get("eq", {})
|
25
|
-
excludes = json.get("ne", {})
|
26
|
-
return Filter(includes=includes, excludes=excludes)
|
27
|
-
|
28
|
-
def json(self):
|
29
|
-
filters = []
|
30
|
-
for k, v in self.includes.items():
|
31
|
-
filters.append({"eq": {k: v}})
|
32
|
-
for k, v in self.excludes.items():
|
33
|
-
filters.append({"neq": {k: v}})
|
34
|
-
return filters
|
35
|
-
|
36
|
-
|
37
|
-
class FilterBuilder:
|
38
|
-
def __init__(self) -> None:
|
39
|
-
self._filter = Filter(includes={}, excludes={})
|
40
|
-
|
41
|
-
def include(self, key: str, value: str) -> "FilterBuilder":
|
42
|
-
self._filter.includes[key] = value
|
43
|
-
return self
|
44
|
-
|
45
|
-
def exclude(self, key: str, value: str) -> "FilterBuilder":
|
46
|
-
self._filter.excludes[key] = value
|
47
|
-
return self
|
48
|
-
|
49
|
-
def build(self) -> Filter:
|
50
|
-
return self._filter
|
51
|
-
|
52
|
-
|
53
|
-
@dataclass
|
54
|
-
class ExtractorBinding:
|
55
|
-
extractor_name: str
|
56
|
-
index_name: str
|
57
|
-
filters: list[Filter]
|
58
|
-
input_params: dict
|
59
|
-
|
60
|
-
def __repr__(self) -> str:
|
61
|
-
return f"ExtractorBinding(extractor_name={self.extractor_name}, index_name={self.index_name})"
|
62
|
-
|
63
|
-
def __str__(self) -> str:
|
64
|
-
return self.__repr__()
|
65
|
-
|
66
|
-
@classmethod
|
67
|
-
def from_dict(cls, json: dict):
|
68
|
-
filters_dict = json["filters"]
|
69
|
-
filters = []
|
70
|
-
for filter_dict in filters_dict:
|
71
|
-
filters.append(Filter.from_dict(filter_dict))
|
72
|
-
json["filters"] = filters
|
73
|
-
return ExtractorBinding(**json)
|
74
|
-
|
75
|
-
|
76
|
-
class Repository:
|
77
|
-
def __init__(
|
78
|
-
self,
|
79
|
-
name: str,
|
80
|
-
service_url: str,
|
81
|
-
extractor_bindings: List[ExtractorBinding] = None,
|
82
|
-
metadata: dict = None,
|
83
|
-
) -> None:
|
84
|
-
self.name = name
|
85
|
-
self._service_url = service_url
|
86
|
-
self.extractor_bindings = extractor_bindings
|
87
|
-
self.metadata = metadata
|
88
|
-
|
89
|
-
async def run_extractors(self) -> dict:
|
90
|
-
response = httpx.post(f"{self._service_url}/run_extractors")
|
91
|
-
response.raise_for_status()
|
92
|
-
|
93
|
-
def add_documents(self, documents: List[Document]) -> None:
|
94
|
-
if isinstance(documents, Document):
|
95
|
-
documents = [documents]
|
96
|
-
req = {"documents": documents}
|
97
|
-
response = httpx.post(
|
98
|
-
f"{self._service_url}/repositories/{self.name}/add_texts",
|
99
|
-
json=req,
|
100
|
-
headers={"Content-Type": "application/json"},
|
101
|
-
)
|
102
|
-
response.raise_for_status()
|
103
|
-
|
104
|
-
def bind_extractor(
|
105
|
-
self,
|
106
|
-
extractor: str,
|
107
|
-
name: str,
|
108
|
-
input_params: dict = {},
|
109
|
-
filter: Filter = None,
|
110
|
-
) -> dict:
|
111
|
-
"""Bind an extractor to this repository
|
112
|
-
|
113
|
-
Args:
|
114
|
-
- extractor (str): Name of the extractor
|
115
|
-
- name (str): Name for this instance
|
116
|
-
- input_params (dict): Dictionary containing extractor input params
|
117
|
-
- filter (Filter): Optional filter for this extractor
|
118
|
-
|
119
|
-
Returns:
|
120
|
-
dict: response payload
|
121
|
-
|
122
|
-
Examples:
|
123
|
-
>>> repo.bind_extractor("EfficientNet", "efficientnet")
|
124
|
-
|
125
|
-
>>> repo.bind_extractor("MiniLML6", "minilm")
|
126
|
-
|
127
|
-
"""
|
128
|
-
req = {
|
129
|
-
"extractor": extractor,
|
130
|
-
"name": name,
|
131
|
-
"input_params": input_params,
|
132
|
-
"filters": filter.json() if filter else [],
|
133
|
-
}
|
134
|
-
|
135
|
-
request_body = json.dumps(req, default=json_set_default)
|
136
|
-
response = httpx.post(
|
137
|
-
f"{self._service_url}/repositories/{self.name}/extractor_bindings",
|
138
|
-
data=request_body,
|
139
|
-
headers={"Content-Type": "application/json"},
|
140
|
-
)
|
141
|
-
try:
|
142
|
-
response.raise_for_status()
|
143
|
-
except httpx.HTTPStatusError as exc:
|
144
|
-
raise ApiException(exc.response.text)
|
145
|
-
return
|
146
|
-
|
147
|
-
def indexes(self) -> List[Index]:
|
148
|
-
response = httpx.get(f"{self._service_url}/repositories/{self.name}/indexes")
|
149
|
-
response.raise_for_status()
|
150
|
-
return response.json()["indexes"]
|
151
|
-
|
152
|
-
@classmethod
|
153
|
-
def get(cls, name: str, service_url: str = DEFAULT_SERVICE_URL) -> "Repository":
|
154
|
-
response = httpx.get(f"{service_url}/repositories/{name}")
|
155
|
-
response.raise_for_status()
|
156
|
-
repository_json = response.json()["repository"]
|
157
|
-
return Repository._from_json(repository_json)
|
158
|
-
|
159
|
-
@classmethod
|
160
|
-
def _from_json(cls, service_url: str, repository_json: dict):
|
161
|
-
extractor_bindings = []
|
162
|
-
for eb in repository_json["repository"]["extractor_bindings"]:
|
163
|
-
extractor_bindings.append(ExtractorBinding.from_dict(eb))
|
164
|
-
metadata = repository_json["repository"]["metadata"]
|
165
|
-
return Repository(
|
166
|
-
name=repository_json["repository"]["name"],
|
167
|
-
service_url=service_url,
|
168
|
-
extractor_bindings=extractor_bindings,
|
169
|
-
metadata=metadata,
|
170
|
-
)
|
171
|
-
|
172
|
-
def query_attribute(self, index_name: str, content_id: str = None) -> dict:
|
173
|
-
params = {"index": index_name}
|
174
|
-
if content_id:
|
175
|
-
params.update({"content_id": content_id})
|
176
|
-
response = httpx.get(
|
177
|
-
f"{self._service_url}/repositories/{self.name}/attributes", params=params
|
178
|
-
)
|
179
|
-
response.raise_for_status()
|
180
|
-
return response.json()["attributes"]
|
181
|
-
|
182
|
-
def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
|
183
|
-
req = {"index": name, "query": query, "k": top_k}
|
184
|
-
response = httpx.post(
|
185
|
-
f"{self._service_url}/repositories/{self.name}/search",
|
186
|
-
json=req,
|
187
|
-
headers={"Content-Type": "application/json"},
|
188
|
-
)
|
189
|
-
response.raise_for_status()
|
190
|
-
return response.json()["results"]
|
191
|
-
|
192
|
-
def __repr__(self) -> str:
|
193
|
-
return f"Repository(name={self.name})"
|
194
|
-
|
195
|
-
def __str__(self) -> str:
|
196
|
-
return self.__repr__()
|
indexify-0.0.4.dist-info/RECORD
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
indexify/__init__.py,sha256=0MDmCLdex7F2RXx-2F1lLqTYQl5Gs4tVryA7j4TXCak,329
|
2
|
-
indexify/client.py,sha256=8IaX_1KYs-JxXH1lIlE_bFE7Aqe2-nt_RRbneHEXNp0,5798
|
3
|
-
indexify/data_containers.py,sha256=n9YlKWUfptDkCR3NL5ldtbngRZC87xmXkoX6smxr47I,689
|
4
|
-
indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
|
5
|
-
indexify/extractor.py,sha256=cBzF7Q3fu4jr7cIeSJXwLf9z45Z81MmwkuY4-zG8HBY,1406
|
6
|
-
indexify/index.py,sha256=r911vtnvzQg3_Hrc3cbPDATNtAMiSbH9OovGKcYG2g8,519
|
7
|
-
indexify/repository.py,sha256=YcJg8P2vvMzqK0AUwESMkA6YizaNBqGOfVnfRrGmMLg,6144
|
8
|
-
indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
|
9
|
-
indexify/utils.py,sha256=wvQB9VpS07iZPOrpmt2i3VIncU6YBkHdpu6rXegDT3Y,282
|
10
|
-
indexify-0.0.4.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
11
|
-
indexify-0.0.4.dist-info/METADATA,sha256=_bQVRWubUKjdMkDmBjhH6DGw5nLGpRPKjhDyzBJnB_4,1862
|
12
|
-
indexify-0.0.4.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
13
|
-
indexify-0.0.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|