PyPI - indexify - Versions diffs - 0.0.36__tar.gz → 0.0.37__tar.gz - Mend

indexify 0.0.36tar.gz → 0.0.37tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{indexify-0.0.36 → indexify-0.0.37}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.36
+Version: 0.0.37
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0

{indexify-0.0.36 → indexify-0.0.37}/indexify/client.py RENAMED Viewed

@@ -549,7 +549,7 @@ class IndexifyClient:
         labels_filter: List[str] = [],
         start_id: str = "",
         limit: int = 10,
-    ) -> List[Content]:
+    ) -> List[ContentMetadata]:
         """
         List content in the current namespace.
@@ -572,7 +572,7 @@ class IndexifyClient:
         content_list = response.json()["content_list"]
         content = []
         for item in content_list:
-            content.append(Content.from_dict(item))
+            content.append(ContentMetadata.from_dict(item))
         return content
     def upload_file(

{indexify-0.0.36 → indexify-0.0.37}/indexify/data.py RENAMED Viewed

@@ -58,7 +58,7 @@ class Content(BaseModel):
         m, _ = mimetypes.guess_type(path)
         with open(path, "rb") as f:
-            return cls(content_type=m, data=f.read())
+            return cls(id="none-for-now", content_type=m, data=f.read())
 class ContentMetadata(BaseModel):

{indexify-0.0.36 → indexify-0.0.37}/indexify/extractor.py RENAMED Viewed

@@ -22,8 +22,8 @@ class Extractor(ABC):
     input_mime_types = ["text/plain"]
     def extract(
-        self, content: Content, params: Type[BaseModel] = None
-    ) -> List[Union[Feature, Content]]:
+        self, input: Type[BaseModel], params: Type[BaseModel] = None
+    ) -> List[Union[Feature, Type[BaseModel]]]:
         """
         Extracts information from the content. Returns a list of features to add
         to the content.
@@ -33,8 +33,8 @@ class Extractor(ABC):
         pass
     def extract_batch(
-        self, content_list: List[Content], params: List[Type[BaseModel]] = None
-    ) -> List[List[Union[Feature, Content]]]:
+        self, input_list: List[Type[BaseModel]], params: List[Type[BaseModel]] = None
+    ) -> List[List[Union[Feature, Type[BaseModel]]]]:
         """
         Extracts information from the content. Returns a list of features to add
         to the content.
@@ -99,13 +99,13 @@ def extractor(
             class DecoratedFn(Extractor):
                 @classmethod
-                def extract(cls, content: Content, params: hint) -> List[Content]:  # type: ignore
+                def extract(cls, input: Type[BaseModel], params: Type[BaseModel]=None) -> List[Content]:  # type: ignore
                     # TODO we can force all the functions to take in a parms object
                     # or check if someone adds a params
                     if params is None:
-                        return fn(content)
+                        return fn(input)
                     else:
-                        return fn(content, params)
+                        return fn(input, params)
                 def sample_input(self) -> Content:
                     return sample_content() if sample_content else self.sample_text()
@@ -115,6 +115,8 @@ def extractor(
             return DecoratedFn
+        wrapper._extractor_name = fn.__name__
         return wrapper
     return construct

indexify-0.0.37/indexify/graph.py ADDED Viewed

@@ -0,0 +1,80 @@
+from indexify import Content, extractor
+from indexify.extractor import Extractor
+from collections import defaultdict
+from typing import Any, Callable, Dict, List, Optional, Self
+import itertools
+@extractor(description="id function")
+def _id(content: Content) -> List[Content]:
+    return [content]
+class Graph:
+    def __init__(self, name: str):
+        # TODO check for cycles
+        self.name = name
+        self.nodes: Dict[str, Callable] = {}
+        self.params: Dict[str, Any] = {}
+        self.edges: Dict[str, List[(str, str)]] = defaultdict(list)
+        self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
+        self.nodes["start"] = _id
+        self.nodes["end"] = _id
+        self._topo_counter = defaultdict(int)
+        self._start_node = None
+    def _node(self, extractor: Extractor, params: Any = None) -> Self:
+        name = extractor._extractor_name
+        # if you've already inserted a node just ignore the new insertion.
+        if name in self.nodes:
+            return
+        self.nodes[name] = extractor
+        self.params[name] = extractor.__dict__.get('params', None)
+        # assign each node a rank of 1 to init the graph
+        self._topo_counter[name] = 1
+        return self
+    def step(self,
+             from_node: extractor,
+             to_node: extractor,
+             prefilter_predicates: Optional[str] = None
+    ) -> Self:
+        self._node(from_node)
+        self._node(to_node)
+        from_node_name = from_node._extractor_name
+        to_node_name = to_node._extractor_name
+        self.edges[from_node_name].append((to_node_name, prefilter_predicates))
+        self._topo_counter[to_node_name] += 1
+        return self
+    """
+    Connect nodes as a fan out from one `from_node` to multiple `to_nodes` and respective `prefilter_predicates`.
+    Note: The user has to match the sizes of the lists to make sure they line up otherwise a None is used as a default.
+    """
+    def steps(self, from_node: extractor, to_nodes: List[extractor], prefilter_predicates: List[str] = []) -> Self:
+        print(f'{to_nodes}, {prefilter_predicates}, {prefilter_predicates}')
+        for t_n, p in itertools.zip_longest(to_nodes, prefilter_predicates, fillvalue=None):
+            self.step(from_node=from_node, to_node=t_n, prefilter_predicates=p)
+        return self
+    def _assign_start_node(self):
+        # this method should be called before a graph can be run
+        nodes = sorted(self._topo_counter.items(), key=lambda x: x[1])
+        self._start_node = nodes[0][0]

{indexify-0.0.36 → indexify-0.0.37}/indexify/local_runner.py RENAMED Viewed

@@ -1,8 +1,10 @@
-from indexify import Content
+from indexify import Content, Extractor
 from collections import defaultdict
 from typing import Any, Callable, Dict, Optional
+import json
 class LocalRunner:
     def __init__(self):
         self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
@@ -15,6 +17,8 @@ class LocalRunner:
         extractor_construct: Callable = g.nodes[node_name]
         params = g.params.get(node_name, None)
+        print(f"----Starting {node_name}")
         res = extractor_construct().extract(content=content, params=params)
         self.results[node_name].extend(res)
@@ -27,27 +31,35 @@ class LocalRunner:
                 self._run(g, content=r, node_name=out_edge)
+    """
+    Returns True if content should be filtered
+    """
     def _prefilter_content(self, content: Content, prefilter_predicate: Optional[str]) -> bool:
         if prefilter_predicate is None:
             return False
         atoms = prefilter_predicate.split('and')
-        if len(atoms) == 0 or len(atoms) == 1:
+        if len(atoms) == 0:
             return False
         # TODO For now only support `and` and `=` and `string values`
         bools = []
         for feature in content.features:
             if feature.feature_type == 'metadata':
-                values = feature.value
+                predicates = json.loads(feature.value)
+                print(f"predicates {predicates}")
-                print(f'{prefilter_predicate, atoms}')
                 for atom in atoms:
                     l, r = atom.split('=')
-                    if l in values:
-                        bools.append(values[l] == r)
+                    if l in predicates:
+                        print(f'predicates[l], r: {predicates[l], r}')
+                        bools.append(predicates[l] != r)
+        print(bools)
         return all(bools)
-    def get_result(self, node_name: str) -> Content:
+    def get_result(self, node: Extractor) -> Content:
+        node_name = node._extractor_name
         return self.results[node_name]

{indexify-0.0.36 → indexify-0.0.37}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "indexify"
-version = "0.0.36"
+version = "0.0.37"
 description = "Python Client for Indexify"
 authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
 license = "Apache 2.0"

indexify-0.0.36/indexify/graph.py DELETED Viewed

@@ -1,49 +0,0 @@
-from indexify import Content, extractor
-from indexify.extractor import Extractor
-from collections import defaultdict
-from typing import Any, Callable, Dict, List, Optional
-@extractor(description="id function")
-def _id(content: Content) -> List[Content]:
-    return [content]
-class Graph:
-    def __init__(self, name: str):
-        # TODO check for cycles
-        self.name = name
-        self.nodes: Dict[str, Callable] = {}
-        self.params: Dict[str, Any] = {}
-        self.edges: Dict[str, List[(str, str)]] = defaultdict(list)
-        self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
-        self.nodes["start"] = _id
-        self.nodes["end"] = _id
-        self._topo_counter = defaultdict(int)
-        self._start_node = None
-    def node(self, name: str, closure: Extractor, params: Any = None) -> None:
-        if name in self.nodes:
-            raise Exception(f"Cannot insert node, node with name: `{name}` already exists")
-        self.nodes[name] = closure
-        self.params[name] = params
-        # assign each node a rank of 1 to init the graph
-        self._topo_counter[name] = 1
-    def edge(self, from_node: str, to_node: str, prefilter_predicates: Optional[str] = None) -> None:
-        self.edges[from_node].append((to_node, prefilter_predicates))
-        self._topo_counter[to_node] += 1
-    def _assign_start_node(self):
-        # this method should be called before a graph can be run
-        nodes = sorted(self._topo_counter.items(), key=lambda x: x[1])
-        self._start_node = nodes[0][0]