indexify 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/client.py +2 -2
- indexify/data.py +1 -1
- indexify/extractor.py +9 -7
- indexify/graph.py +39 -8
- indexify/local_runner.py +19 -7
- {indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/METADATA +1 -1
- {indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/RECORD +9 -9
- {indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/WHEEL +0 -0
indexify/client.py
CHANGED
@@ -549,7 +549,7 @@ class IndexifyClient:
|
|
549
549
|
labels_filter: List[str] = [],
|
550
550
|
start_id: str = "",
|
551
551
|
limit: int = 10,
|
552
|
-
) -> List[
|
552
|
+
) -> List[ContentMetadata]:
|
553
553
|
"""
|
554
554
|
List content in the current namespace.
|
555
555
|
|
@@ -572,7 +572,7 @@ class IndexifyClient:
|
|
572
572
|
content_list = response.json()["content_list"]
|
573
573
|
content = []
|
574
574
|
for item in content_list:
|
575
|
-
content.append(
|
575
|
+
content.append(ContentMetadata.from_dict(item))
|
576
576
|
return content
|
577
577
|
|
578
578
|
def upload_file(
|
indexify/data.py
CHANGED
indexify/extractor.py
CHANGED
@@ -22,8 +22,8 @@ class Extractor(ABC):
|
|
22
22
|
input_mime_types = ["text/plain"]
|
23
23
|
|
24
24
|
def extract(
|
25
|
-
self,
|
26
|
-
) -> List[Union[Feature,
|
25
|
+
self, input: Type[BaseModel], params: Type[BaseModel] = None
|
26
|
+
) -> List[Union[Feature, Type[BaseModel]]]:
|
27
27
|
"""
|
28
28
|
Extracts information from the content. Returns a list of features to add
|
29
29
|
to the content.
|
@@ -33,8 +33,8 @@ class Extractor(ABC):
|
|
33
33
|
pass
|
34
34
|
|
35
35
|
def extract_batch(
|
36
|
-
self,
|
37
|
-
) -> List[List[Union[Feature,
|
36
|
+
self, input_list: List[Type[BaseModel]], params: List[Type[BaseModel]] = None
|
37
|
+
) -> List[List[Union[Feature, Type[BaseModel]]]]:
|
38
38
|
"""
|
39
39
|
Extracts information from the content. Returns a list of features to add
|
40
40
|
to the content.
|
@@ -99,13 +99,13 @@ def extractor(
|
|
99
99
|
|
100
100
|
class DecoratedFn(Extractor):
|
101
101
|
@classmethod
|
102
|
-
def extract(cls,
|
102
|
+
def extract(cls, input: Type[BaseModel], params: Type[BaseModel]=None) -> List[Content]: # type: ignore
|
103
103
|
# TODO we can force all the functions to take in a parms object
|
104
104
|
# or check if someone adds a params
|
105
105
|
if params is None:
|
106
|
-
return fn(
|
106
|
+
return fn(input)
|
107
107
|
else:
|
108
|
-
return fn(
|
108
|
+
return fn(input, params)
|
109
109
|
|
110
110
|
def sample_input(self) -> Content:
|
111
111
|
return sample_content() if sample_content else self.sample_text()
|
@@ -115,6 +115,8 @@ def extractor(
|
|
115
115
|
|
116
116
|
return DecoratedFn
|
117
117
|
|
118
|
+
wrapper._extractor_name = fn.__name__
|
119
|
+
|
118
120
|
return wrapper
|
119
121
|
|
120
122
|
return construct
|
indexify/graph.py
CHANGED
@@ -2,7 +2,9 @@ from indexify import Content, extractor
|
|
2
2
|
from indexify.extractor import Extractor
|
3
3
|
|
4
4
|
from collections import defaultdict
|
5
|
-
from typing import Any, Callable, Dict, List, Optional
|
5
|
+
from typing import Any, Callable, Dict, List, Optional, Self
|
6
|
+
|
7
|
+
import itertools
|
6
8
|
|
7
9
|
|
8
10
|
@extractor(description="id function")
|
@@ -28,20 +30,49 @@ class Graph:
|
|
28
30
|
|
29
31
|
self._start_node = None
|
30
32
|
|
31
|
-
def
|
33
|
+
def _node(self, extractor: Extractor, params: Any = None) -> Self:
|
34
|
+
name = extractor._extractor_name
|
35
|
+
|
36
|
+
# if you've already inserted a node just ignore the new insertion.
|
32
37
|
if name in self.nodes:
|
33
|
-
|
38
|
+
return
|
34
39
|
|
35
|
-
self.nodes[name] =
|
36
|
-
self.params[name] = params
|
40
|
+
self.nodes[name] = extractor
|
41
|
+
self.params[name] = extractor.__dict__.get('params', None)
|
37
42
|
|
38
43
|
# assign each node a rank of 1 to init the graph
|
39
44
|
self._topo_counter[name] = 1
|
40
45
|
|
41
|
-
|
42
|
-
|
46
|
+
return self
|
47
|
+
|
48
|
+
def step(self,
|
49
|
+
from_node: extractor,
|
50
|
+
to_node: extractor,
|
51
|
+
prefilter_predicates: Optional[str] = None
|
52
|
+
) -> Self:
|
53
|
+
|
54
|
+
self._node(from_node)
|
55
|
+
self._node(to_node)
|
56
|
+
|
57
|
+
from_node_name = from_node._extractor_name
|
58
|
+
to_node_name = to_node._extractor_name
|
59
|
+
|
60
|
+
self.edges[from_node_name].append((to_node_name, prefilter_predicates))
|
61
|
+
|
62
|
+
self._topo_counter[to_node_name] += 1
|
63
|
+
|
64
|
+
return self
|
65
|
+
|
66
|
+
"""
|
67
|
+
Connect nodes as a fan out from one `from_node` to multiple `to_nodes` and respective `prefilter_predicates`.
|
68
|
+
Note: The user has to match the sizes of the lists to make sure they line up otherwise a None is used as a default.
|
69
|
+
"""
|
70
|
+
def steps(self, from_node: extractor, to_nodes: List[extractor], prefilter_predicates: List[str] = []) -> Self:
|
71
|
+
print(f'{to_nodes}, {prefilter_predicates}, {prefilter_predicates}')
|
72
|
+
for t_n, p in itertools.zip_longest(to_nodes, prefilter_predicates, fillvalue=None):
|
73
|
+
self.step(from_node=from_node, to_node=t_n, prefilter_predicates=p)
|
43
74
|
|
44
|
-
self
|
75
|
+
return self
|
45
76
|
|
46
77
|
def _assign_start_node(self):
|
47
78
|
# this method should be called before a graph can be run
|
indexify/local_runner.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
-
from indexify import Content
|
1
|
+
from indexify import Content, Extractor
|
2
2
|
|
3
3
|
from collections import defaultdict
|
4
4
|
from typing import Any, Callable, Dict, Optional
|
5
5
|
|
6
|
+
import json
|
7
|
+
|
6
8
|
class LocalRunner:
|
7
9
|
def __init__(self):
|
8
10
|
self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
|
@@ -15,6 +17,8 @@ class LocalRunner:
|
|
15
17
|
extractor_construct: Callable = g.nodes[node_name]
|
16
18
|
params = g.params.get(node_name, None)
|
17
19
|
|
20
|
+
print(f"----Starting {node_name}")
|
21
|
+
|
18
22
|
res = extractor_construct().extract(content=content, params=params)
|
19
23
|
|
20
24
|
self.results[node_name].extend(res)
|
@@ -27,27 +31,35 @@ class LocalRunner:
|
|
27
31
|
|
28
32
|
self._run(g, content=r, node_name=out_edge)
|
29
33
|
|
34
|
+
"""
|
35
|
+
Returns True if content should be filtered
|
36
|
+
"""
|
30
37
|
def _prefilter_content(self, content: Content, prefilter_predicate: Optional[str]) -> bool:
|
31
38
|
if prefilter_predicate is None:
|
32
39
|
return False
|
33
40
|
|
34
41
|
atoms = prefilter_predicate.split('and')
|
35
|
-
if len(atoms) == 0
|
42
|
+
if len(atoms) == 0:
|
36
43
|
return False
|
37
44
|
|
38
45
|
# TODO For now only support `and` and `=` and `string values`
|
39
46
|
bools = []
|
40
47
|
for feature in content.features:
|
41
48
|
if feature.feature_type == 'metadata':
|
42
|
-
|
49
|
+
predicates = json.loads(feature.value)
|
50
|
+
|
51
|
+
print(f"predicates {predicates}")
|
43
52
|
|
44
|
-
print(f'{prefilter_predicate, atoms}')
|
45
53
|
for atom in atoms:
|
46
54
|
l, r = atom.split('=')
|
47
|
-
if l in
|
48
|
-
|
55
|
+
if l in predicates:
|
56
|
+
print(f'predicates[l], r: {predicates[l], r}')
|
57
|
+
bools.append(predicates[l] != r)
|
58
|
+
|
59
|
+
print(bools)
|
49
60
|
|
50
61
|
return all(bools)
|
51
62
|
|
52
|
-
def get_result(self,
|
63
|
+
def get_result(self, node: Extractor) -> Content:
|
64
|
+
node_name = node._extractor_name
|
53
65
|
return self.results[node_name]
|
@@ -1,18 +1,18 @@
|
|
1
1
|
indexify/__init__.py,sha256=W58FqmnKHIx-gHKTBDQa1QI49Gi8f1rw90yDg31jwgQ,743
|
2
|
-
indexify/client.py,sha256=
|
3
|
-
indexify/data.py,sha256=
|
2
|
+
indexify/client.py,sha256=faGiWAtdXkL4Vmx6xr0iHJLIBwhS2XZbQ6ld_7sMsBc,25874
|
3
|
+
indexify/data.py,sha256=91We7J2QAKBOTu1yF3ApTl4yl4C-nDL2WSXhBdekLWg,2334
|
4
4
|
indexify/data_loaders/__init__.py,sha256=EiYemxCP4zRfDWnDKiX6-SFwXVmv1TSdcXHBQRbE_Uw,1309
|
5
5
|
indexify/data_loaders/local_directory_loader.py,sha256=kF7VwkuOJFBrhKrR7IOOdZ4TDAItw_CyUOfcuej1CKI,1080
|
6
6
|
indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
|
7
7
|
indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
|
8
8
|
indexify/extraction_policy.py,sha256=awNDqwCz0tr4jTQmGf7s8_s6vcEuxMb0xynEl7b7iPI,2076
|
9
|
-
indexify/extractor.py,sha256=
|
9
|
+
indexify/extractor.py,sha256=HnLot4DQv7aVI3FwFNH83LzKjq7DlSR1-wmpcVC89tE,3930
|
10
10
|
indexify/extractor_utils.py,sha256=68V5vZB9GYx648dyyVKAia0M4pG_R31QPqUQz3ZZ1FQ,6593
|
11
|
-
indexify/graph.py,sha256=
|
12
|
-
indexify/local_runner.py,sha256=
|
11
|
+
indexify/graph.py,sha256=hUGTpaI3ale54sQ90u5P3-RJCwsSlEJg1V1R0rmCZE0,2576
|
12
|
+
indexify/local_runner.py,sha256=VV4Ff_ctibw0ZL4u1wVA7drRx4zLTgNmT_qLX3Cq2SY,2167
|
13
13
|
indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
|
14
14
|
indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
|
15
|
-
indexify-0.0.
|
16
|
-
indexify-0.0.
|
17
|
-
indexify-0.0.
|
18
|
-
indexify-0.0.
|
15
|
+
indexify-0.0.37.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
16
|
+
indexify-0.0.37.dist-info/METADATA,sha256=_3uThIPuUiPQ9BBVoqoEEo5Prqp_LHx59jHrZ2CpSgk,1891
|
17
|
+
indexify-0.0.37.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
18
|
+
indexify-0.0.37.dist-info/RECORD,,
|
File without changes
|
File without changes
|