indexify 0.0.27__tar.gz → 0.0.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.27
3
+ Version: 0.0.29
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -2,10 +2,12 @@ from .index import Index
2
2
  from .client import IndexifyClient
3
3
  from .extraction_policy import ExtractionGraph
4
4
  from .client import IndexifyClient, Document, generate_hash_from_string, generate_unique_hex_id
5
+ from .data_containers import Content
5
6
  from .settings import DEFAULT_SERVICE_URL
6
7
 
7
8
  __all__ = [
8
9
  "Index",
10
+ "Content",
9
11
  "Document",
10
12
  "IndexifyClient",
11
13
  "ExtractionGraph",
@@ -10,7 +10,7 @@ from .extraction_policy import ExtractionPolicy, ExtractionGraph
10
10
  from .index import Index
11
11
  from .utils import json_set_default
12
12
  from .error import Error
13
- from .data_containers import TextChunk
13
+ from .data_containers import TextChunk, Content
14
14
  from indexify.exceptions import ApiException
15
15
  from dataclasses import dataclass
16
16
  from typing import List, Optional, Union, Dict
@@ -153,7 +153,9 @@ class IndexifyClient:
153
153
  try:
154
154
  response = self._client.request(method, timeout=self._timeout, **kwargs)
155
155
  status_code = str(response.status_code)
156
- if status_code.startswith("4") or status_code.startswith("5"):
156
+ if status_code.startswith("4"):
157
+ raise ApiException("status code: " + status_code + " request args: " + str(kwargs))
158
+ if status_code.startswith("5"):
157
159
  raise ApiException(response.text)
158
160
  # error = Error.from_tonic_error_string(str(response.url), response.text)
159
161
  # self.__print_additional_error_context(error)
@@ -511,6 +513,30 @@ class IndexifyClient:
511
513
  headers={"Content-Type": "application/json"},
512
514
  )
513
515
  return response.json()["results"]
516
+
517
+ def list_content(self, extraction_graph: str, extraction_policy: str = "", start_id: str="", limit: int=10) -> List[Content]:
518
+ """
519
+ List content in the current namespace.
520
+
521
+ Args:
522
+ - extraction_graph (str): extraction graph name
523
+ - start_index (str): start index for pagination
524
+ - limit (int): number of items to return
525
+ """
526
+ params = {"graph": extraction_graph, "start_id": start_id, "limit": limit}
527
+ if extraction_policy:
528
+ params["source"] = extraction_policy
529
+ else:
530
+ params["source"] = "ingestion"
531
+ response = self.get(
532
+ f"namespaces/{self.namespace}/content",
533
+ params=params,
534
+ )
535
+ content_list = response.json()["content_list"]
536
+ content = []
537
+ for item in content_list:
538
+ content.append(Content.from_dict(item))
539
+ return content
514
540
 
515
541
  def upload_file(
516
542
  self,
@@ -622,7 +648,8 @@ class IndexifyClient:
622
648
  json=req,
623
649
  headers={"Content-Type": "application/json"},
624
650
  )
625
- return response.json()
651
+ response.raise_for_status()
652
+ return response.json()["content_id"]
626
653
 
627
654
  def wait_for_extraction(self, content_ids: Union[str, List[str]]):
628
655
  """
@@ -0,0 +1,37 @@
1
+ from enum import Enum
2
+ from typing import List
3
+ from dataclasses import dataclass, field
4
+
5
+ @dataclass
6
+ class Content:
7
+ id: str
8
+ parent_id: str
9
+ labels: dict[str, any]
10
+ extraction_graph_names: List[str]
11
+ extraction_policy: str
12
+ mime_type: str
13
+
14
+ @classmethod
15
+ def from_dict(cls, json: dict):
16
+ return Content(
17
+ id=json["id"],
18
+ parent_id=json["parent_id"],
19
+ labels=json["labels"],
20
+ extraction_graph_names=json["extraction_graph_names"],
21
+ extraction_policy=json["source"],
22
+ mime_type=json["mime_type"],
23
+ )
24
+
25
+ @dataclass
26
+ class TextChunk:
27
+ text: str
28
+ metadata: dict[str, any] = field(default_factory=dict)
29
+ score: float = 0.0
30
+
31
+ def to_dict(self):
32
+ return {"text": self.text, "metadata": self.metadata}
33
+
34
+
35
+ @dataclass
36
+ class SearchResult:
37
+ results: List[TextChunk]
@@ -33,11 +33,13 @@ class ExtractionPolicy:
33
33
  class ExtractionGraph:
34
34
  id: str
35
35
  name: str
36
+ description: str
36
37
  extraction_policies: List[ExtractionPolicy]
37
38
 
38
39
  @classmethod
39
40
  def from_dict(cls, json: dict):
40
41
  json["id"] = json.get("id", None)
42
+ json["description"] = json.get("description", None)
41
43
  if "namespace" in json.keys():
42
44
  json.pop("namespace")
43
45
  return ExtractionGraph(**json)
@@ -47,6 +49,11 @@ class ExtractionGraph:
47
49
  import yaml
48
50
 
49
51
  return ExtractionGraph.from_dict(yaml.load(spec, Loader=yaml.FullLoader))
52
+
53
+ @staticmethod
54
+ def from_yaml_file(path: str):
55
+ with open(path, "r") as f:
56
+ return ExtractionGraph.from_yaml(f.read())
50
57
 
51
58
  def to_dict(self) -> dict:
52
59
  filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.0.27"
3
+ version = "0.0.29"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
6
6
  license = "Apache 2.0"
@@ -1,18 +0,0 @@
1
- from enum import Enum
2
- from typing import List
3
- from dataclasses import dataclass, field
4
-
5
-
6
- @dataclass
7
- class TextChunk:
8
- text: str
9
- metadata: dict[str, any] = field(default_factory=dict)
10
- score: float = 0.0
11
-
12
- def to_dict(self):
13
- return {"text": self.text, "metadata": self.metadata}
14
-
15
-
16
- @dataclass
17
- class SearchResult:
18
- results: List[TextChunk]
File without changes
File without changes
File without changes
File without changes
File without changes