indexify 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/client.py CHANGED
@@ -549,7 +549,7 @@ class IndexifyClient:
549
549
  labels_filter: List[str] = [],
550
550
  start_id: str = "",
551
551
  limit: int = 10,
552
- ) -> List[Content]:
552
+ ) -> List[ContentMetadata]:
553
553
  """
554
554
  List content in the current namespace.
555
555
 
@@ -572,7 +572,7 @@ class IndexifyClient:
572
572
  content_list = response.json()["content_list"]
573
573
  content = []
574
574
  for item in content_list:
575
- content.append(Content.from_dict(item))
575
+ content.append(ContentMetadata.from_dict(item))
576
576
  return content
577
577
 
578
578
  def upload_file(
indexify/data.py CHANGED
@@ -58,7 +58,7 @@ class Content(BaseModel):
58
58
 
59
59
  m, _ = mimetypes.guess_type(path)
60
60
  with open(path, "rb") as f:
61
- return cls(content_type=m, data=f.read())
61
+ return cls(id="none-for-now", content_type=m, data=f.read())
62
62
 
63
63
 
64
64
  class ContentMetadata(BaseModel):
indexify/extractor.py CHANGED
@@ -22,8 +22,8 @@ class Extractor(ABC):
22
22
  input_mime_types = ["text/plain"]
23
23
 
24
24
  def extract(
25
- self, content: Content, params: Type[BaseModel] = None
26
- ) -> List[Union[Feature, Content]]:
25
+ self, input: Type[BaseModel], params: Type[BaseModel] = None
26
+ ) -> List[Union[Feature, Type[BaseModel]]]:
27
27
  """
28
28
  Extracts information from the content. Returns a list of features to add
29
29
  to the content.
@@ -33,8 +33,8 @@ class Extractor(ABC):
33
33
  pass
34
34
 
35
35
  def extract_batch(
36
- self, content_list: List[Content], params: List[Type[BaseModel]] = None
37
- ) -> List[List[Union[Feature, Content]]]:
36
+ self, input_list: List[Type[BaseModel]], params: List[Type[BaseModel]] = None
37
+ ) -> List[List[Union[Feature, Type[BaseModel]]]]:
38
38
  """
39
39
  Extracts information from the content. Returns a list of features to add
40
40
  to the content.
@@ -99,13 +99,13 @@ def extractor(
99
99
 
100
100
  class DecoratedFn(Extractor):
101
101
  @classmethod
102
- def extract(cls, content: Content, params: hint) -> List[Content]: # type: ignore
102
+ def extract(cls, input: Type[BaseModel], params: Type[BaseModel]=None) -> List[Content]: # type: ignore
103
103
  # TODO we can force all the functions to take in a parms object
104
104
  # or check if someone adds a params
105
105
  if params is None:
106
- return fn(content)
106
+ return fn(input)
107
107
  else:
108
- return fn(content, params)
108
+ return fn(input, params)
109
109
 
110
110
  def sample_input(self) -> Content:
111
111
  return sample_content() if sample_content else self.sample_text()
@@ -115,6 +115,8 @@ def extractor(
115
115
 
116
116
  return DecoratedFn
117
117
 
118
+ wrapper._extractor_name = fn.__name__
119
+
118
120
  return wrapper
119
121
 
120
122
  return construct
indexify/graph.py CHANGED
@@ -2,7 +2,9 @@ from indexify import Content, extractor
2
2
  from indexify.extractor import Extractor
3
3
 
4
4
  from collections import defaultdict
5
- from typing import Any, Callable, Dict, List, Optional
5
+ from typing import Any, Callable, Dict, List, Optional, Self
6
+
7
+ import itertools
6
8
 
7
9
 
8
10
  @extractor(description="id function")
@@ -28,20 +30,49 @@ class Graph:
28
30
 
29
31
  self._start_node = None
30
32
 
31
- def node(self, name: str, closure: Extractor, params: Any = None) -> None:
33
+ def _node(self, extractor: Extractor, params: Any = None) -> Self:
34
+ name = extractor._extractor_name
35
+
36
+ # if you've already inserted a node just ignore the new insertion.
32
37
  if name in self.nodes:
33
- raise Exception(f"Cannot insert node, node with name: `{name}` already exists")
38
+ return
34
39
 
35
- self.nodes[name] = closure
36
- self.params[name] = params
40
+ self.nodes[name] = extractor
41
+ self.params[name] = extractor.__dict__.get('params', None)
37
42
 
38
43
  # assign each node a rank of 1 to init the graph
39
44
  self._topo_counter[name] = 1
40
45
 
41
- def edge(self, from_node: str, to_node: str, prefilter_predicates: Optional[str] = None) -> None:
42
- self.edges[from_node].append((to_node, prefilter_predicates))
46
+ return self
47
+
48
+ def step(self,
49
+ from_node: extractor,
50
+ to_node: extractor,
51
+ prefilter_predicates: Optional[str] = None
52
+ ) -> Self:
53
+
54
+ self._node(from_node)
55
+ self._node(to_node)
56
+
57
+ from_node_name = from_node._extractor_name
58
+ to_node_name = to_node._extractor_name
59
+
60
+ self.edges[from_node_name].append((to_node_name, prefilter_predicates))
61
+
62
+ self._topo_counter[to_node_name] += 1
63
+
64
+ return self
65
+
66
+ """
67
+ Connect nodes as a fan out from one `from_node` to multiple `to_nodes` and respective `prefilter_predicates`.
68
+ Note: The user has to match the sizes of the lists to make sure they line up otherwise a None is used as a default.
69
+ """
70
+ def steps(self, from_node: extractor, to_nodes: List[extractor], prefilter_predicates: List[str] = []) -> Self:
71
+ print(f'{to_nodes}, {prefilter_predicates}, {prefilter_predicates}')
72
+ for t_n, p in itertools.zip_longest(to_nodes, prefilter_predicates, fillvalue=None):
73
+ self.step(from_node=from_node, to_node=t_n, prefilter_predicates=p)
43
74
 
44
- self._topo_counter[to_node] += 1
75
+ return self
45
76
 
46
77
  def _assign_start_node(self):
47
78
  # this method should be called before a graph can be run
indexify/local_runner.py CHANGED
@@ -1,8 +1,10 @@
1
- from indexify import Content
1
+ from indexify import Content, Extractor
2
2
 
3
3
  from collections import defaultdict
4
4
  from typing import Any, Callable, Dict, Optional
5
5
 
6
+ import json
7
+
6
8
  class LocalRunner:
7
9
  def __init__(self):
8
10
  self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
@@ -15,6 +17,8 @@ class LocalRunner:
15
17
  extractor_construct: Callable = g.nodes[node_name]
16
18
  params = g.params.get(node_name, None)
17
19
 
20
+ print(f"----Starting {node_name}")
21
+
18
22
  res = extractor_construct().extract(content=content, params=params)
19
23
 
20
24
  self.results[node_name].extend(res)
@@ -27,27 +31,35 @@ class LocalRunner:
27
31
 
28
32
  self._run(g, content=r, node_name=out_edge)
29
33
 
34
+ """
35
+ Returns True if content should be filtered
36
+ """
30
37
  def _prefilter_content(self, content: Content, prefilter_predicate: Optional[str]) -> bool:
31
38
  if prefilter_predicate is None:
32
39
  return False
33
40
 
34
41
  atoms = prefilter_predicate.split('and')
35
- if len(atoms) == 0 or len(atoms) == 1:
42
+ if len(atoms) == 0:
36
43
  return False
37
44
 
38
45
  # TODO For now only support `and` and `=` and `string values`
39
46
  bools = []
40
47
  for feature in content.features:
41
48
  if feature.feature_type == 'metadata':
42
- values = feature.value
49
+ predicates = json.loads(feature.value)
50
+
51
+ print(f"predicates {predicates}")
43
52
 
44
- print(f'{prefilter_predicate, atoms}')
45
53
  for atom in atoms:
46
54
  l, r = atom.split('=')
47
- if l in values:
48
- bools.append(values[l] == r)
55
+ if l in predicates:
56
+ print(f'predicates[l], r: {predicates[l], r}')
57
+ bools.append(predicates[l] != r)
58
+
59
+ print(bools)
49
60
 
50
61
  return all(bools)
51
62
 
52
- def get_result(self, node_name: str) -> Content:
63
+ def get_result(self, node: Extractor) -> Content:
64
+ node_name = node._extractor_name
53
65
  return self.results[node_name]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.36
3
+ Version: 0.0.37
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,18 +1,18 @@
1
1
  indexify/__init__.py,sha256=W58FqmnKHIx-gHKTBDQa1QI49Gi8f1rw90yDg31jwgQ,743
2
- indexify/client.py,sha256=czMeUoAMMiEH3txysdRTCu84mwWj9Ec_NjiXy6oc9Vw,25858
3
- indexify/data.py,sha256=XWs5_rW2ZGldgwtqN62VwZF15ot1POBkf_X5ByVmfiI,2315
2
+ indexify/client.py,sha256=faGiWAtdXkL4Vmx6xr0iHJLIBwhS2XZbQ6ld_7sMsBc,25874
3
+ indexify/data.py,sha256=91We7J2QAKBOTu1yF3ApTl4yl4C-nDL2WSXhBdekLWg,2334
4
4
  indexify/data_loaders/__init__.py,sha256=EiYemxCP4zRfDWnDKiX6-SFwXVmv1TSdcXHBQRbE_Uw,1309
5
5
  indexify/data_loaders/local_directory_loader.py,sha256=kF7VwkuOJFBrhKrR7IOOdZ4TDAItw_CyUOfcuej1CKI,1080
6
6
  indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
7
7
  indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
8
8
  indexify/extraction_policy.py,sha256=awNDqwCz0tr4jTQmGf7s8_s6vcEuxMb0xynEl7b7iPI,2076
9
- indexify/extractor.py,sha256=Pzcn9gZET5XRz3OMGQ_k9XjFT8UVeyaynOT86_C08yY,3837
9
+ indexify/extractor.py,sha256=HnLot4DQv7aVI3FwFNH83LzKjq7DlSR1-wmpcVC89tE,3930
10
10
  indexify/extractor_utils.py,sha256=68V5vZB9GYx648dyyVKAia0M4pG_R31QPqUQz3ZZ1FQ,6593
11
- indexify/graph.py,sha256=5WQphl30vd606MHz_IZ23oZVQot9dPN79cksbhjUncA,1572
12
- indexify/local_runner.py,sha256=yXKH2HrfhXdsx3vtrk3Q4LJGcWoAJazNfRhDp_s6Kx0,1900
11
+ indexify/graph.py,sha256=hUGTpaI3ale54sQ90u5P3-RJCwsSlEJg1V1R0rmCZE0,2576
12
+ indexify/local_runner.py,sha256=VV4Ff_ctibw0ZL4u1wVA7drRx4zLTgNmT_qLX3Cq2SY,2167
13
13
  indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
14
14
  indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
15
- indexify-0.0.36.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
16
- indexify-0.0.36.dist-info/METADATA,sha256=6Kh0Ngr9iAQF0NPyRULy0KOE6n5i9XFQGaZiuyxP1ss,1891
17
- indexify-0.0.36.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
18
- indexify-0.0.36.dist-info/RECORD,,
15
+ indexify-0.0.37.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
16
+ indexify-0.0.37.dist-info/METADATA,sha256=_3uThIPuUiPQ9BBVoqoEEo5Prqp_LHx59jHrZ2CpSgk,1891
17
+ indexify-0.0.37.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
18
+ indexify-0.0.37.dist-info/RECORD,,