indexify 0.0.8__tar.gz → 0.0.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.0.8 → indexify-0.0.10}/PKG-INFO +1 -1
- {indexify-0.0.8 → indexify-0.0.10}/indexify/__init__.py +4 -3
- {indexify-0.0.8 → indexify-0.0.10}/indexify/client.py +25 -24
- {indexify-0.0.8 → indexify-0.0.10}/indexify/data_containers.py +0 -19
- indexify-0.0.8/indexify/extractor_binding.py → indexify-0.0.10/indexify/extraction_policy.py +3 -3
- {indexify-0.0.8 → indexify-0.0.10}/indexify/extractor.py +4 -10
- {indexify-0.0.8 → indexify-0.0.10}/indexify/index.py +1 -1
- indexify-0.0.10/indexify/utils.py +7 -0
- {indexify-0.0.8 → indexify-0.0.10}/pyproject.toml +1 -1
- indexify-0.0.8/indexify/utils.py +0 -16
- {indexify-0.0.8 → indexify-0.0.10}/LICENSE.txt +0 -0
- {indexify-0.0.8 → indexify-0.0.10}/README.md +0 -0
- {indexify-0.0.8 → indexify-0.0.10}/indexify/exceptions.py +0 -0
- {indexify-0.0.8 → indexify-0.0.10}/indexify/settings.py +0 -0
@@ -1,12 +1,13 @@
|
|
1
1
|
from .index import Index
|
2
2
|
from .client import IndexifyClient
|
3
|
-
from .
|
4
|
-
from .
|
3
|
+
from .extraction_policy import ExtractionPolicy
|
4
|
+
from .client import IndexifyClient, Document
|
5
5
|
from .settings import DEFAULT_SERVICE_URL
|
6
6
|
|
7
7
|
__all__ = [
|
8
8
|
"Index",
|
9
|
+
"Document",
|
9
10
|
"IndexifyClient",
|
10
|
-
"
|
11
|
+
"ExtractionPolicy",
|
11
12
|
"DEFAULT_SERVICE_URL",
|
12
13
|
]
|
@@ -3,7 +3,7 @@ import json
|
|
3
3
|
from collections import namedtuple
|
4
4
|
from .settings import DEFAULT_SERVICE_URL
|
5
5
|
from .extractor import Extractor
|
6
|
-
from .
|
6
|
+
from .extraction_policy import ExtractionPolicy
|
7
7
|
from .index import Index
|
8
8
|
from .utils import json_set_default
|
9
9
|
from .data_containers import TextChunk
|
@@ -41,7 +41,7 @@ class IndexifyClient:
|
|
41
41
|
**kwargs,
|
42
42
|
):
|
43
43
|
self.namespace: str = namespace
|
44
|
-
self.
|
44
|
+
self.extraction_policies: List[ExtractionPolicy] = []
|
45
45
|
self.labels: dict = {}
|
46
46
|
self._service_url = service_url
|
47
47
|
self._client = httpx.Client(*args, **kwargs)
|
@@ -50,9 +50,9 @@ class IndexifyClient:
|
|
50
50
|
response = self.get(f"namespaces/{self.namespace}")
|
51
51
|
response.raise_for_status()
|
52
52
|
resp_json = response.json()
|
53
|
-
# initialize
|
54
|
-
for eb in resp_json["namespace"]["
|
55
|
-
self.
|
53
|
+
# initialize extraction_policies
|
54
|
+
for eb in resp_json["namespace"]["extraction_policies"]:
|
55
|
+
self.extraction_policies.append(ExtractionPolicy.from_dict(eb))
|
56
56
|
|
57
57
|
@classmethod
|
58
58
|
def with_mtls(
|
@@ -189,7 +189,7 @@ class IndexifyClient:
|
|
189
189
|
def create_namespace(
|
190
190
|
self,
|
191
191
|
namespace: str,
|
192
|
-
|
192
|
+
extraction_policies: list = [],
|
193
193
|
labels: dict = {},
|
194
194
|
) -> "IndexifyClient":
|
195
195
|
"""
|
@@ -198,15 +198,15 @@ class IndexifyClient:
|
|
198
198
|
Returns:
|
199
199
|
IndexifyClient: a new client with the given namespace
|
200
200
|
"""
|
201
|
-
|
202
|
-
for bd in
|
203
|
-
if isinstance(bd,
|
204
|
-
|
201
|
+
extraction_policies = []
|
202
|
+
for bd in extraction_policies:
|
203
|
+
if isinstance(bd, ExtractionPolicy):
|
204
|
+
extraction_policies.append(bd.to_dict())
|
205
205
|
else:
|
206
|
-
|
206
|
+
extraction_policies.append(bd)
|
207
207
|
req = {
|
208
208
|
"name": namespace,
|
209
|
-
"
|
209
|
+
"extraction_policies": extraction_policies,
|
210
210
|
"labels": labels,
|
211
211
|
}
|
212
212
|
|
@@ -239,19 +239,19 @@ class IndexifyClient:
|
|
239
239
|
extractors.append(Extractor.from_dict(ed))
|
240
240
|
return extractors
|
241
241
|
|
242
|
-
def
|
242
|
+
def get_extraction_policies(self):
|
243
243
|
"""
|
244
|
-
Retrieve and update the list of
|
244
|
+
Retrieve and update the list of extraction policies for the current namespace.
|
245
245
|
"""
|
246
246
|
response = self.get(f"namespaces/{self.namespace}")
|
247
247
|
response.raise_for_status()
|
248
248
|
|
249
|
-
self.
|
250
|
-
for eb in response.json()["namespace"]["
|
251
|
-
self.
|
252
|
-
return self.
|
249
|
+
self.extraction_policies = []
|
250
|
+
for eb in response.json()["namespace"]["extraction_policies"]:
|
251
|
+
self.extraction_policies.append(ExtractionPolicy.from_dict(eb))
|
252
|
+
return self.extraction_policies
|
253
253
|
|
254
|
-
def
|
254
|
+
def add_extraction_policy(
|
255
255
|
self,
|
256
256
|
extractor: str,
|
257
257
|
name: str,
|
@@ -259,7 +259,7 @@ class IndexifyClient:
|
|
259
259
|
labels_eq: str = None,
|
260
260
|
content_source="ingestion",
|
261
261
|
) -> dict:
|
262
|
-
"""
|
262
|
+
"""Add a new extraction policy.
|
263
263
|
|
264
264
|
Args:
|
265
265
|
- extractor (str): Name of the extractor
|
@@ -271,9 +271,9 @@ class IndexifyClient:
|
|
271
271
|
dict: response payload
|
272
272
|
|
273
273
|
Examples:
|
274
|
-
>>> repo.
|
274
|
+
>>> repo.add_extraction_policy("EfficientNet", "efficientnet")
|
275
275
|
|
276
|
-
>>> repo.
|
276
|
+
>>> repo.add_extraction_policy("MiniLML6", "minilm")
|
277
277
|
|
278
278
|
"""
|
279
279
|
req = {
|
@@ -288,13 +288,13 @@ class IndexifyClient:
|
|
288
288
|
|
289
289
|
request_body = json.dumps(req, default=json_set_default)
|
290
290
|
response = self.post(
|
291
|
-
f"namespaces/{self.namespace}/
|
291
|
+
f"namespaces/{self.namespace}/extraction_policies",
|
292
292
|
data=request_body,
|
293
293
|
headers={"Content-Type": "application/json"},
|
294
294
|
)
|
295
295
|
|
296
296
|
# update self.extractor_bindings
|
297
|
-
self.
|
297
|
+
self.get_extraction_policies()
|
298
298
|
|
299
299
|
try:
|
300
300
|
response.raise_for_status()
|
@@ -404,5 +404,6 @@ class IndexifyClient:
|
|
404
404
|
response = self.post(
|
405
405
|
f"namespaces/{self.namespace}/upload_file",
|
406
406
|
files={"file": f},
|
407
|
+
timeout=None,
|
407
408
|
)
|
408
409
|
response.raise_for_status()
|
@@ -3,15 +3,6 @@ from typing import List
|
|
3
3
|
from dataclasses import dataclass, field
|
4
4
|
|
5
5
|
|
6
|
-
class TextSplitter(str, Enum):
|
7
|
-
NEWLINE = "new_line"
|
8
|
-
REGEX = "regex"
|
9
|
-
NOOP = "noop"
|
10
|
-
|
11
|
-
def __str__(self) -> str:
|
12
|
-
return self.value.lower()
|
13
|
-
|
14
|
-
|
15
6
|
@dataclass
|
16
7
|
class TextChunk:
|
17
8
|
text: str
|
@@ -22,16 +13,6 @@ class TextChunk:
|
|
22
13
|
return {"text": self.text, "metadata": self.metadata}
|
23
14
|
|
24
15
|
|
25
|
-
@dataclass
|
26
|
-
class SearchChunk:
|
27
|
-
index: str
|
28
|
-
query: str
|
29
|
-
k: int
|
30
|
-
|
31
|
-
def to_dict(self):
|
32
|
-
return {"index": self.index, "query": self.query, "k": self.k}
|
33
|
-
|
34
|
-
|
35
16
|
@dataclass
|
36
17
|
class SearchResult:
|
37
18
|
results: List[TextChunk]
|
indexify-0.0.8/indexify/extractor_binding.py → indexify-0.0.10/indexify/extraction_policy.py
RENAMED
@@ -3,7 +3,7 @@ from typing import Optional
|
|
3
3
|
|
4
4
|
|
5
5
|
@dataclass
|
6
|
-
class
|
6
|
+
class ExtractionPolicy:
|
7
7
|
extractor: str
|
8
8
|
name: str
|
9
9
|
content_source: str
|
@@ -11,7 +11,7 @@ class ExtractorBinding:
|
|
11
11
|
labels_eq: Optional[str] = None
|
12
12
|
|
13
13
|
def __repr__(self) -> str:
|
14
|
-
return f"
|
14
|
+
return f"ExtractionPolicy(name={self.name} extractor={self.extractor})"
|
15
15
|
|
16
16
|
def __str__(self) -> str:
|
17
17
|
return self.__repr__()
|
@@ -24,4 +24,4 @@ class ExtractorBinding:
|
|
24
24
|
def from_dict(cls, json: dict):
|
25
25
|
if "filters_eq" in json:
|
26
26
|
json["labels_eq"] = json.pop("filters_eq")
|
27
|
-
return
|
27
|
+
return ExtractionPolicy(**json)
|
@@ -16,22 +16,15 @@ class ExtractorSchema:
|
|
16
16
|
outputs: dict[str, Union[EmbeddingSchema, dict]]
|
17
17
|
|
18
18
|
|
19
|
-
@dataclass
|
20
|
-
class Extractor:
|
21
|
-
name: str
|
22
|
-
description: str
|
23
|
-
input_params: dict
|
24
|
-
outputs: ExtractorSchema
|
25
|
-
|
26
|
-
|
27
19
|
class Extractor:
|
28
20
|
def __init__(
|
29
|
-
self, name: str, description: str, input_params: dict, outputs: ExtractorSchema
|
21
|
+
self, name: str, description: str, input_params: dict, outputs: ExtractorSchema, input_mime_types: list[str]
|
30
22
|
):
|
31
23
|
self.name = name
|
32
24
|
self.description = description
|
33
25
|
self.input_params = input_params
|
34
26
|
self.outputs = outputs
|
27
|
+
self.input_mime_types = input_mime_types
|
35
28
|
|
36
29
|
@classmethod
|
37
30
|
def from_dict(cls, data):
|
@@ -39,11 +32,12 @@ class Extractor:
|
|
39
32
|
name=data["name"],
|
40
33
|
description=data["description"],
|
41
34
|
input_params=data["input_params"],
|
35
|
+
input_mime_types=data["input_mime_types"],
|
42
36
|
outputs=data["outputs"],
|
43
37
|
)
|
44
38
|
|
45
39
|
def __repr__(self) -> str:
|
46
|
-
return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, outputs={self.outputs})"
|
40
|
+
return f"Extractor(name={self.name}, description={self.description}, input_params={self.input_params}, input_mime_types={self.input_mime_types}, outputs={self.outputs})"
|
47
41
|
|
48
42
|
def __str__(self) -> str:
|
49
43
|
return self.__repr__()
|
indexify-0.0.8/indexify/utils.py
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
|
3
|
-
|
4
|
-
def json_set_default(obj):
|
5
|
-
if isinstance(obj, set):
|
6
|
-
return list(obj)
|
7
|
-
raise TypeError
|
8
|
-
|
9
|
-
|
10
|
-
class Metric(str, Enum):
|
11
|
-
COSINE = "cosine"
|
12
|
-
DOT = "dot"
|
13
|
-
EUCLIDEAN = "euclidean"
|
14
|
-
|
15
|
-
def __str__(self) -> str:
|
16
|
-
return self.name.lower()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|