PyPI - indexify - Versions diffs - 0.0.29__tar.gz → 0.0.31__tar.gz - Mend

@@ -154,7 +154,9 @@ class IndexifyClient:
             response = self._client.request(method, timeout=self._timeout, **kwargs)
             status_code = str(response.status_code)
             if status_code.startswith("4"):
-                raise ApiException("status code: " + status_code + " request args: " + str(kwargs))
+                raise ApiException(
+                    "status code: " + status_code + " request args: " + str(kwargs)
+                )
             if status_code.startswith("5"):
                 raise ApiException(response.text)
                 # error = Error.from_tonic_error_string(str(response.url), response.text)
@@ -342,11 +344,11 @@ class IndexifyClient:
         """
         Retrieve and update the list of extraction policies for the current namespace.
         """
-        response = self.get(f"namespaces/{self.namespace}")
+        response = self.get(f"namespaces/{self.namespace}/extraction_graphs")
         json = response.json()
         self.extraction_graphs = []
-        for graph in json["namespace"]["extraction_graphs"]:
+        for graph in json["extraction_graphs"]:
             self.extraction_graphs.append(ExtractionGraph.from_dict(graph))
         return self.extraction_graphs
@@ -368,6 +370,28 @@ class IndexifyClient:
         )
         return
+    def link_extraction_graphs(
+        self, source_graph: str, content_source: str, linked_graph: str
+    ):
+        """
+        Link an extraction graph to another extraction graph.
+        Args:
+            - source_graph (str): source extraction graph
+            - content_source (str): content source in source graph
+            - linked_graph (str): target extraction graph
+        """
+        req = {
+            "content_source": content_source,
+            "linked_graph_name": linked_graph,
+        }
+        response = self.post(
+            f"namespaces/{self.namespace}/extraction_graphs/{source_graph}/links",
+            json=req,
+            headers={"Content-Type": "application/json"},
+        )
+        return
     def get_content_metadata(self, content_id: str) -> dict:
         """
         Get metadata for a specific content ID in a given index.
@@ -375,17 +399,17 @@ class IndexifyClient:
         Args:
             - content_id (str): content id to query
         """
-        response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
+        response = self.get(f"namespaces/{self.namespace}/content/{content_id}/metadata")
         return response.json()
-    def download_content(self, id: str) -> bytes:
+    def download_content(self, content_id: str) -> bytes:
         """
         Download content from id. Return bytes
         Args:
-            - id (str): id of content to download
+            - content_id (str): id of content to download
         """
-        response = self.get(f"namespaces/{self.namespace}/content/{id}/download")
+        response = self.get(f"namespaces/{self.namespace}/content/{content_id}/download")
         return response.content
     def add_documents(
@@ -424,21 +448,21 @@ class IndexifyClient:
             raise TypeError(
                 "Invalid type for documents. Expected Document, str, or list of these."
             )
-        req = {
-            "documents": [doc._asdict() for doc in documents],
-            "extraction_graph_names": extraction_graphs,
-        }
-        response = self.post(
-            f"namespaces/{self.namespace}/add_texts",
-            json=req,
-            headers={"Content-Type": "application/json"},
-        )
-        response.raise_for_status()
-        response_json = response.json()
-        content_ids = response_json["content_ids"]
-        if len(documents) == 1 and len(content_ids) == 1:
-            return content_ids[0]
+        for document in documents:
+            document.labels["mime_type"] = "text/plain"
+        content_ids = []
+        if isinstance(extraction_graphs, str):
+            extraction_graphs = [extraction_graphs]
+        for extraction_graph in extraction_graphs:
+            for document in documents:
+                response = self.post(
+                    f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
+                    files={"file": document.text},
+                    data={"labels": json.dumps(document.labels)},
+                )
+                response_json = response.json()
+                content_id = response_json["content_id"]
+                content_ids.append(content_id)
         return content_ids
     def delete_documents(self, document_ids: List[str]) -> None:
@@ -506,15 +530,22 @@ class IndexifyClient:
             - top_k (int): top k nearest neighbors to be returned
             - filters (List[str]): list of filters to apply
         """
-        req = {"index": name, "query": query, "k": top_k, "filters": filters}
+        req = {"query": query, "k": top_k, "filters": filters}
         response = self.post(
-            f"namespaces/{self.namespace}/search",
+            f"namespaces/{self.namespace}/indexes/{name}/search",
             json=req,
             headers={"Content-Type": "application/json"},
         )
         return response.json()["results"]
-    def list_content(self, extraction_graph: str, extraction_policy: str = "", start_id: str="", limit: int=10) -> List[Content]:
+    def list_content(
+        self,
+        extraction_graph: str,
+        extraction_policy: str = "",
+        labels_filter: List[str] = [],
+        start_id: str = "",
+        limit: int = 10,
+    ) -> List[Content]:
         """
         List content in the current namespace.
@@ -528,6 +559,8 @@ class IndexifyClient:
             params["source"] = extraction_policy
         else:
             params["source"] = "ingestion"
+        if len(labels_filter) > 0:
+            params["labels_filter"] = labels_filter
         response = self.get(
             f"namespaces/{self.namespace}/content",
             params=params,
@@ -554,18 +587,20 @@ class IndexifyClient:
         """
         if isinstance(extraction_graphs, str):
             extraction_graphs = [extraction_graphs]
-        params = {"extraction_graph_names": extraction_graphs}
+        params = {}
         if id is not None:
             params["id"] = id
         with open(path, "rb") as f:
-            response = self.post(
-                f"namespaces/{self.namespace}/upload_file",
-                files={"file": f},
-                data={"labels": json.dumps(labels)},
-                params=params,
-            )
+            for extraction_graph in extraction_graphs:
+                response = self.post(
+                    f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
+                    files={"file": f},
+                    data={"labels": json.dumps(labels)},
+                    params=params,
+                )
             response_json = response.json()
-            return response_json["content_id"]
+            content_id = response_json["content_id"]
+            return content_id
     def list_schemas(self) -> List[str]:
         """
@@ -574,35 +609,32 @@ class IndexifyClient:
         response = self.get(f"namespaces/{self.namespace}/schemas")
         return response.json()
-    def get_content_tree(self, content_id: str):
+    def get_extracted_content(
+        self, ingested_content_id: str, graph_name: str, extractor_name: str, blocking=False
+    ):
         """
-        Get content tree for a given content id
+        Get list of child for a given content id and their content up to the specified level.
         Args:
-            - content_id (str): id of content
+        - ingested_content_id (str): id of content
+        - graph_name (str): name of extraction graph
+        - extractor_name (str): name of extractor
+        - blocking (bool): wait for extraction to complete before returning (default: False)
         """
+        if blocking:
+            self.wait_for_extraction(ingested_content_id)
         response = self.get(
-            f"namespaces/{self.namespace}/content/{content_id}/content-tree"
+            f"namespaces/{self.namespace}/extraction_graphs/{graph_name}/extraction_policies/{extractor_name}/content/{ingested_content_id}"
         )
-        return response.json()
-    def get_extracted_content(self, content_id: str, graph_name: str, policy_name: str):
-        """
-        Get list of child for a given content id and their content up to the specified level.
-        Args:
-        - content_id (str): id of content
-        - level (int): depth of content retrieval (default: 0)
-        """
-        content_tree = self.get_content_tree(content_id)
+        content_tree = response.json()
         child_list = []
         for item in content_tree["content_tree_metadata"]:
             if (
                 graph_name in item["extraction_graph_names"]
-                and item["source"] == policy_name
+                and item["source"] == extractor_name
             ):
                 content = self.download_content(item["id"])
-                child_list.append({"id": item["id"], "content": content})
+                child_list.append({"id": item["id"], "mime_type": item["mime_type"], "content": content})
         return child_list
@@ -660,9 +692,13 @@ class IndexifyClient:
         """
         if type(content_ids) == str:
             content_ids = [content_ids]
-        print("Waiting for extraction to complete for content id: ", ",".join(content_ids))
+        print(
+            "Waiting for extraction to complete for content id: ", ",".join(content_ids)
+        )
         for content_id in content_ids:
-            response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
+            response = self.get(
+                f"namespaces/{self.namespace}/content/{content_id}/wait"
+            )
             print("Extraction completed for content id: ", content_id)
         response.raise_for_status()

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: indexify
-Version: 0.0.29
+Version: 0.0.31
 Summary: Python Client for Indexify
 Home-page: https://github.com/tensorlakeai/indexify
 License: Apache 2.0
@@ -35,8 +35,8 @@ pip install indexify
 ## Usage
-See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
-Look at the [examples](examples) directory for more examples.
+See the [getting started](https://docs.getindexify.com/getting_started/) guide for examples of how to use the client.
+Look at the [examples](https://github.com/tensorlakeai/indexify/tree/main/examples) directory for more examples.
 ## Development

@@ -15,8 +15,8 @@ pip install indexify
 ## Usage
-See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
-Look at the [examples](examples) directory for more examples.
+See the [getting started](https://docs.getindexify.com/getting_started/) guide for examples of how to use the client.
+Look at the [examples](https://github.com/tensorlakeai/indexify/tree/main/examples) directory for more examples.
 ## Development

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "indexify"
-version = "0.0.29"
+version = "0.0.31"
 description = "Python Client for Indexify"
 authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
 license = "Apache 2.0"

indexify 0.0.29__tar.gz → 0.0.31__tar.gz

indexify 0.0.29tar.gz → 0.0.31tar.gz