PyPI - indexify - Versions diffs - 0.0.29__py3-none-any.whl → 0.0.32__py3-none-any.whl - Mend

indexify 0.0.29py3-none-any.whl → 0.0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

indexify/client.py CHANGED Viewed

@@ -154,7 +154,9 @@ class IndexifyClient:
             response = self._client.request(method, timeout=self._timeout, **kwargs)
             status_code = str(response.status_code)
             if status_code.startswith("4"):
-                raise ApiException("status code: " + status_code + " request args: " + str(kwargs))
+                raise ApiException(
+                    "status code: " + status_code + " request args: " + str(kwargs)
+                )
             if status_code.startswith("5"):
                 raise ApiException(response.text)
                 # error = Error.from_tonic_error_string(str(response.url), response.text)
@@ -342,11 +344,11 @@ class IndexifyClient:
         """
         Retrieve and update the list of extraction policies for the current namespace.
         """
-        response = self.get(f"namespaces/{self.namespace}")
+        response = self.get(f"namespaces/{self.namespace}/extraction_graphs")
         json = response.json()
         self.extraction_graphs = []
-        for graph in json["namespace"]["extraction_graphs"]:
+        for graph in json["extraction_graphs"]:
             self.extraction_graphs.append(ExtractionGraph.from_dict(graph))
         return self.extraction_graphs
@@ -368,6 +370,28 @@ class IndexifyClient:
         )
         return
+    def link_extraction_graphs(
+        self, source_graph: str, content_source: str, linked_graph: str
+    ):
+        """
+        Link an extraction graph to another extraction graph.
+        Args:
+            - source_graph (str): source extraction graph
+            - content_source (str): content source in source graph
+            - linked_graph (str): target extraction graph
+        """
+        req = {
+            "content_source": content_source,
+            "linked_graph_name": linked_graph,
+        }
+        response = self.post(
+            f"namespaces/{self.namespace}/extraction_graphs/{source_graph}/links",
+            json=req,
+            headers={"Content-Type": "application/json"},
+        )
+        return
     def get_content_metadata(self, content_id: str) -> dict:
         """
         Get metadata for a specific content ID in a given index.
@@ -375,17 +399,17 @@ class IndexifyClient:
         Args:
             - content_id (str): content id to query
         """
-        response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
+        response = self.get(f"namespaces/{self.namespace}/content/{content_id}/metadata")
         return response.json()
-    def download_content(self, id: str) -> bytes:
+    def download_content(self, content_id: str) -> bytes:
         """
         Download content from id. Return bytes
         Args:
-            - id (str): id of content to download
+            - content_id (str): id of content to download
         """
-        response = self.get(f"namespaces/{self.namespace}/content/{id}/download")
+        response = self.get(f"namespaces/{self.namespace}/content/{content_id}/download")
         return response.content
     def add_documents(
@@ -424,21 +448,21 @@ class IndexifyClient:
             raise TypeError(
                 "Invalid type for documents. Expected Document, str, or list of these."
             )
-        req = {
-            "documents": [doc._asdict() for doc in documents],
-            "extraction_graph_names": extraction_graphs,
-        }
-        response = self.post(
-            f"namespaces/{self.namespace}/add_texts",
-            json=req,
-            headers={"Content-Type": "application/json"},
-        )
-        response.raise_for_status()
-        response_json = response.json()
-        content_ids = response_json["content_ids"]
-        if len(documents) == 1 and len(content_ids) == 1:
-            return content_ids[0]
+        for document in documents:
+            document.labels["mime_type"] = "text/plain"
+        content_ids = []
+        if isinstance(extraction_graphs, str):
+            extraction_graphs = [extraction_graphs]
+        for extraction_graph in extraction_graphs:
+            for document in documents:
+                response = self.post(
+                    f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
+                    files={"file": document.text},
+                    data={"labels": json.dumps(document.labels)},
+                )
+                response_json = response.json()
+                content_id = response_json["content_id"]
+                content_ids.append(content_id)
         return content_ids
     def delete_documents(self, document_ids: List[str]) -> None:
@@ -506,15 +530,22 @@ class IndexifyClient:
             - top_k (int): top k nearest neighbors to be returned
             - filters (List[str]): list of filters to apply
         """
-        req = {"index": name, "query": query, "k": top_k, "filters": filters}
+        req = {"query": query, "k": top_k, "filters": filters}
         response = self.post(
-            f"namespaces/{self.namespace}/search",
+            f"namespaces/{self.namespace}/indexes/{name}/search",
             json=req,
             headers={"Content-Type": "application/json"},
         )
         return response.json()["results"]
-    def list_content(self, extraction_graph: str, extraction_policy: str = "", start_id: str="", limit: int=10) -> List[Content]:
+    def list_content(
+        self,
+        extraction_graph: str,
+        extraction_policy: str = "",
+        labels_filter: List[str] = [],
+        start_id: str = "",
+        limit: int = 10,
+    ) -> List[Content]:
         """
         List content in the current namespace.
@@ -528,6 +559,8 @@ class IndexifyClient:
             params["source"] = extraction_policy
         else:
             params["source"] = "ingestion"
+        if len(labels_filter) > 0:
+            params["labels_filter"] = labels_filter
         response = self.get(
             f"namespaces/{self.namespace}/content",
             params=params,
@@ -554,18 +587,20 @@ class IndexifyClient:
         """
         if isinstance(extraction_graphs, str):
             extraction_graphs = [extraction_graphs]
-        params = {"extraction_graph_names": extraction_graphs}
+        params = {}
         if id is not None:
             params["id"] = id
         with open(path, "rb") as f:
-            response = self.post(
-                f"namespaces/{self.namespace}/upload_file",
-                files={"file": f},
-                data={"labels": json.dumps(labels)},
-                params=params,
-            )
+            for extraction_graph in extraction_graphs:
+                response = self.post(
+                    f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
+                    files={"file": f},
+                    data={"labels": json.dumps(labels)},
+                    params=params,
+                )
             response_json = response.json()
-            return response_json["content_id"]
+            content_id = response_json["content_id"]
+            return content_id
     def list_schemas(self) -> List[str]:
         """
@@ -574,35 +609,32 @@ class IndexifyClient:
         response = self.get(f"namespaces/{self.namespace}/schemas")
         return response.json()
-    def get_content_tree(self, content_id: str):
+    def get_extracted_content(
+        self, ingested_content_id: str, graph_name: str, extractor_name: str, blocking=False
+    ):
         """
-        Get content tree for a given content id
+        Get list of child for a given content id and their content up to the specified level.
         Args:
-            - content_id (str): id of content
+        - ingested_content_id (str): id of content
+        - graph_name (str): name of extraction graph
+        - extractor_name (str): name of extractor
+        - blocking (bool): wait for extraction to complete before returning (default: False)
         """
+        if blocking:
+            self.wait_for_extraction(ingested_content_id)
         response = self.get(
-            f"namespaces/{self.namespace}/content/{content_id}/content-tree"
+            f"namespaces/{self.namespace}/extraction_graphs/{graph_name}/extraction_policies/{extractor_name}/content/{ingested_content_id}"
         )
-        return response.json()
-    def get_extracted_content(self, content_id: str, graph_name: str, policy_name: str):
-        """
-        Get list of child for a given content id and their content up to the specified level.
-        Args:
-        - content_id (str): id of content
-        - level (int): depth of content retrieval (default: 0)
-        """
-        content_tree = self.get_content_tree(content_id)
+        content_tree = response.json()
         child_list = []
         for item in content_tree["content_tree_metadata"]:
             if (
                 graph_name in item["extraction_graph_names"]
-                and item["source"] == policy_name
+                and item["source"] == extractor_name
             ):
                 content = self.download_content(item["id"])
-                child_list.append({"id": item["id"], "content": content})
+                child_list.append({"id": item["id"], "mime_type": item["mime_type"], "content": content})
         return child_list
@@ -660,9 +692,13 @@ class IndexifyClient:
         """
         if type(content_ids) == str:
             content_ids = [content_ids]
-        print("Waiting for extraction to complete for content id: ", ",".join(content_ids))
+        print(
+            "Waiting for extraction to complete for content id: ", ",".join(content_ids)
+        )
         for content_id in content_ids:
-            response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
+            response = self.get(
+                f"namespaces/{self.namespace}/content/{content_id}/wait"
+            )
             print("Extraction completed for content id: ", content_id)
         response.raise_for_status()

{indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.29
+Version: 0.0.32
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0
@@ -35,8 +35,8 @@ pip install indexify
 ## Usage
-See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
-Look at the [examples](examples) directory for more examples.
+See the [getting started](https://docs.getindexify.com/getting_started/) guide for examples of how to use the client.
+Look at the [examples](https://github.com/tensorlakeai/indexify/tree/main/examples) directory for more examples.
 ## Development

{indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 indexify/__init__.py,sha256=xqymbwqaiHiWXFpm7Cll2j-_V1lNQH2EEGlevtCTZK4,525
-indexify/client.py,sha256=14O63O_D1xatnRItS6ecs9juREooYQKelDz0Zc9TSrY,23197
+indexify/client.py,sha256=YkNhM1xDe0VcPx9Z3yLdl3y_msoOrGAj3ykefcItVhE,24653
 indexify/data_containers.py,sha256=fIX_rghpojrCUtmZ0grywoq_HWniDgN1mnR7yXDej-Y,874
 indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
 indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
@@ -8,7 +8,7 @@ indexify/extractor.py,sha256=sWFLlXHgEfWlmiKAXN6ytUt_uG7th-XGNHqz-TG39gs,1216
 indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
 indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
 indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
-indexify-0.0.29.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-indexify-0.0.29.dist-info/METADATA,sha256=c-2NDf8ayGuboLyMkc0tVXACfp9L30QbxQTCARfuFeA,1798
-indexify-0.0.29.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-indexify-0.0.29.dist-info/RECORD,,
+indexify-0.0.32.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+indexify-0.0.32.dist-info/METADATA,sha256=Rj5F0dH8Ll6fRbwhiY0niW7JfcwWU-4F28pDRLp4w2s,1854
+indexify-0.0.32.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+indexify-0.0.32.dist-info/RECORD,,

{indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/WHEEL RENAMED Viewed

File without changes

indexify 0.0.29__py3-none-any.whl → 0.0.32__py3-none-any.whl

indexify 0.0.29py3-none-any.whl → 0.0.32py3-none-any.whl