PyPI - indexify - Versions diffs - 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl - Mend

indexify 0.0.36py3-none-any.whl → 0.0.37py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

indexify/client.py +2 -2
indexify/data.py +1 -1
indexify/extractor.py +9 -7
indexify/graph.py +39 -8
indexify/local_runner.py +19 -7
{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/METADATA +1 -1
{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/RECORD +9 -9
{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/LICENSE.txt +0 -0
{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/WHEEL +0 -0

indexify/client.py CHANGED Viewed

@@ -549,7 +549,7 @@ class IndexifyClient:
         labels_filter: List[str] = [],
         start_id: str = "",
         limit: int = 10,
-    ) -> List[Content]:
+    ) -> List[ContentMetadata]:
         """
         List content in the current namespace.
@@ -572,7 +572,7 @@ class IndexifyClient:
         content_list = response.json()["content_list"]
         content = []
         for item in content_list:
-            content.append(Content.from_dict(item))
+            content.append(ContentMetadata.from_dict(item))
         return content
     def upload_file(

indexify/data.py CHANGED Viewed

@@ -58,7 +58,7 @@ class Content(BaseModel):
         m, _ = mimetypes.guess_type(path)
         with open(path, "rb") as f:
-            return cls(content_type=m, data=f.read())
+            return cls(id="none-for-now", content_type=m, data=f.read())
 class ContentMetadata(BaseModel):

indexify/extractor.py CHANGED Viewed

@@ -22,8 +22,8 @@ class Extractor(ABC):
     input_mime_types = ["text/plain"]
     def extract(
-        self, content: Content, params: Type[BaseModel] = None
-    ) -> List[Union[Feature, Content]]:
+        self, input: Type[BaseModel], params: Type[BaseModel] = None
+    ) -> List[Union[Feature, Type[BaseModel]]]:
         """
         Extracts information from the content. Returns a list of features to add
         to the content.
@@ -33,8 +33,8 @@ class Extractor(ABC):
         pass
     def extract_batch(
-        self, content_list: List[Content], params: List[Type[BaseModel]] = None
-    ) -> List[List[Union[Feature, Content]]]:
+        self, input_list: List[Type[BaseModel]], params: List[Type[BaseModel]] = None
+    ) -> List[List[Union[Feature, Type[BaseModel]]]]:
         """
         Extracts information from the content. Returns a list of features to add
         to the content.
@@ -99,13 +99,13 @@ def extractor(
             class DecoratedFn(Extractor):
                 @classmethod
-                def extract(cls, content: Content, params: hint) -> List[Content]:  # type: ignore
+                def extract(cls, input: Type[BaseModel], params: Type[BaseModel]=None) -> List[Content]:  # type: ignore
                     # TODO we can force all the functions to take in a parms object
                     # or check if someone adds a params
                     if params is None:
-                        return fn(content)
+                        return fn(input)
                     else:
-                        return fn(content, params)
+                        return fn(input, params)
                 def sample_input(self) -> Content:
                     return sample_content() if sample_content else self.sample_text()
@@ -115,6 +115,8 @@ def extractor(
             return DecoratedFn
+        wrapper._extractor_name = fn.__name__
         return wrapper
     return construct

indexify/graph.py CHANGED Viewed

@@ -2,7 +2,9 @@ from indexify import Content, extractor
 from indexify.extractor import Extractor
 from collections import defaultdict
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Self
+import itertools
 @extractor(description="id function")
@@ -28,20 +30,49 @@ class Graph:
         self._start_node = None
-    def node(self, name: str, closure: Extractor, params: Any = None) -> None:
+    def _node(self, extractor: Extractor, params: Any = None) -> Self:
+        name = extractor._extractor_name
+        # if you've already inserted a node just ignore the new insertion.
         if name in self.nodes:
-            raise Exception(f"Cannot insert node, node with name: `{name}` already exists")
+            return
-        self.nodes[name] = closure
-        self.params[name] = params
+        self.nodes[name] = extractor
+        self.params[name] = extractor.__dict__.get('params', None)
         # assign each node a rank of 1 to init the graph
         self._topo_counter[name] = 1
-    def edge(self, from_node: str, to_node: str, prefilter_predicates: Optional[str] = None) -> None:
-        self.edges[from_node].append((to_node, prefilter_predicates))
+        return self
+    def step(self,
+             from_node: extractor,
+             to_node: extractor,
+             prefilter_predicates: Optional[str] = None
+    ) -> Self:
+        self._node(from_node)
+        self._node(to_node)
+        from_node_name = from_node._extractor_name
+        to_node_name = to_node._extractor_name
+        self.edges[from_node_name].append((to_node_name, prefilter_predicates))
+        self._topo_counter[to_node_name] += 1
+        return self
+    """
+    Connect nodes as a fan out from one `from_node` to multiple `to_nodes` and respective `prefilter_predicates`.
+    Note: The user has to match the sizes of the lists to make sure they line up otherwise a None is used as a default.
+    """
+    def steps(self, from_node: extractor, to_nodes: List[extractor], prefilter_predicates: List[str] = []) -> Self:
+        print(f'{to_nodes}, {prefilter_predicates}, {prefilter_predicates}')
+        for t_n, p in itertools.zip_longest(to_nodes, prefilter_predicates, fillvalue=None):
+            self.step(from_node=from_node, to_node=t_n, prefilter_predicates=p)
-        self._topo_counter[to_node] += 1
+        return self
     def _assign_start_node(self):
         # this method should be called before a graph can be run

indexify/local_runner.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from indexify import Content
+from indexify import Content, Extractor
 from collections import defaultdict
 from typing import Any, Callable, Dict, Optional
+import json
 class LocalRunner:
     def __init__(self):
         self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
@@ -15,6 +17,8 @@ class LocalRunner:
         extractor_construct: Callable = g.nodes[node_name]
         params = g.params.get(node_name, None)
+        print(f"----Starting {node_name}")
         res = extractor_construct().extract(content=content, params=params)
         self.results[node_name].extend(res)
@@ -27,27 +31,35 @@ class LocalRunner:
                 self._run(g, content=r, node_name=out_edge)
+    """
+    Returns True if content should be filtered
+    """
     def _prefilter_content(self, content: Content, prefilter_predicate: Optional[str]) -> bool:
         if prefilter_predicate is None:
             return False
         atoms = prefilter_predicate.split('and')
-        if len(atoms) == 0 or len(atoms) == 1:
+        if len(atoms) == 0:
             return False
         # TODO For now only support `and` and `=` and `string values`
         bools = []
         for feature in content.features:
             if feature.feature_type == 'metadata':
-                values = feature.value
+                predicates = json.loads(feature.value)
+                print(f"predicates {predicates}")
-                print(f'{prefilter_predicate, atoms}')
                 for atom in atoms:
                     l, r = atom.split('=')
-                    if l in values:
-                        bools.append(values[l] == r)
+                    if l in predicates:
+                        print(f'predicates[l], r: {predicates[l], r}')
+                        bools.append(predicates[l] != r)
+        print(bools)
         return all(bools)
-    def get_result(self, node_name: str) -> Content:
+    def get_result(self, node: Extractor) -> Content:
+        node_name = node._extractor_name
         return self.results[node_name]

{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.36
+Version: 0.0.37
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0

{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 indexify/__init__.py,sha256=W58FqmnKHIx-gHKTBDQa1QI49Gi8f1rw90yDg31jwgQ,743
-indexify/client.py,sha256=czMeUoAMMiEH3txysdRTCu84mwWj9Ec_NjiXy6oc9Vw,25858
-indexify/data.py,sha256=XWs5_rW2ZGldgwtqN62VwZF15ot1POBkf_X5ByVmfiI,2315
+indexify/client.py,sha256=faGiWAtdXkL4Vmx6xr0iHJLIBwhS2XZbQ6ld_7sMsBc,25874
+indexify/data.py,sha256=91We7J2QAKBOTu1yF3ApTl4yl4C-nDL2WSXhBdekLWg,2334
 indexify/data_loaders/__init__.py,sha256=EiYemxCP4zRfDWnDKiX6-SFwXVmv1TSdcXHBQRbE_Uw,1309
 indexify/data_loaders/local_directory_loader.py,sha256=kF7VwkuOJFBrhKrR7IOOdZ4TDAItw_CyUOfcuej1CKI,1080
 indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
 indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
 indexify/extraction_policy.py,sha256=awNDqwCz0tr4jTQmGf7s8_s6vcEuxMb0xynEl7b7iPI,2076
-indexify/extractor.py,sha256=Pzcn9gZET5XRz3OMGQ_k9XjFT8UVeyaynOT86_C08yY,3837
+indexify/extractor.py,sha256=HnLot4DQv7aVI3FwFNH83LzKjq7DlSR1-wmpcVC89tE,3930
 indexify/extractor_utils.py,sha256=68V5vZB9GYx648dyyVKAia0M4pG_R31QPqUQz3ZZ1FQ,6593
-indexify/graph.py,sha256=5WQphl30vd606MHz_IZ23oZVQot9dPN79cksbhjUncA,1572
-indexify/local_runner.py,sha256=yXKH2HrfhXdsx3vtrk3Q4LJGcWoAJazNfRhDp_s6Kx0,1900
+indexify/graph.py,sha256=hUGTpaI3ale54sQ90u5P3-RJCwsSlEJg1V1R0rmCZE0,2576
+indexify/local_runner.py,sha256=VV4Ff_ctibw0ZL4u1wVA7drRx4zLTgNmT_qLX3Cq2SY,2167
 indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
 indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
-indexify-0.0.36.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-indexify-0.0.36.dist-info/METADATA,sha256=6Kh0Ngr9iAQF0NPyRULy0KOE6n5i9XFQGaZiuyxP1ss,1891
-indexify-0.0.36.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-indexify-0.0.36.dist-info/RECORD,,
+indexify-0.0.37.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+indexify-0.0.37.dist-info/METADATA,sha256=_3uThIPuUiPQ9BBVoqoEEo5Prqp_LHx59jHrZ2CpSgk,1891
+indexify-0.0.37.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+indexify-0.0.37.dist-info/RECORD,,

{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{indexify-0.0.36.dist-info → indexify-0.0.37.dist-info}/WHEEL RENAMED Viewed

File without changes

indexify 0.0.36__py3-none-any.whl → 0.0.37__py3-none-any.whl

indexify 0.0.36py3-none-any.whl → 0.0.37py3-none-any.whl