indexify 0.0.28__tar.gz → 0.0.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.0.28 → indexify-0.0.29}/PKG-INFO +1 -1
- {indexify-0.0.28 → indexify-0.0.29}/indexify/__init__.py +2 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/client.py +28 -2
- indexify-0.0.29/indexify/data_containers.py +37 -0
- {indexify-0.0.28 → indexify-0.0.29}/pyproject.toml +1 -1
- indexify-0.0.28/indexify/data_containers.py +0 -18
- {indexify-0.0.28 → indexify-0.0.29}/LICENSE.txt +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/README.md +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/error.py +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/exceptions.py +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/extraction_policy.py +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/extractor.py +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/index.py +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/settings.py +0 -0
- {indexify-0.0.28 → indexify-0.0.29}/indexify/utils.py +0 -0
@@ -2,10 +2,12 @@ from .index import Index
|
|
2
2
|
from .client import IndexifyClient
|
3
3
|
from .extraction_policy import ExtractionGraph
|
4
4
|
from .client import IndexifyClient, Document, generate_hash_from_string, generate_unique_hex_id
|
5
|
+
from .data_containers import Content
|
5
6
|
from .settings import DEFAULT_SERVICE_URL
|
6
7
|
|
7
8
|
__all__ = [
|
8
9
|
"Index",
|
10
|
+
"Content",
|
9
11
|
"Document",
|
10
12
|
"IndexifyClient",
|
11
13
|
"ExtractionGraph",
|
@@ -10,7 +10,7 @@ from .extraction_policy import ExtractionPolicy, ExtractionGraph
|
|
10
10
|
from .index import Index
|
11
11
|
from .utils import json_set_default
|
12
12
|
from .error import Error
|
13
|
-
from .data_containers import TextChunk
|
13
|
+
from .data_containers import TextChunk, Content
|
14
14
|
from indexify.exceptions import ApiException
|
15
15
|
from dataclasses import dataclass
|
16
16
|
from typing import List, Optional, Union, Dict
|
@@ -153,7 +153,9 @@ class IndexifyClient:
|
|
153
153
|
try:
|
154
154
|
response = self._client.request(method, timeout=self._timeout, **kwargs)
|
155
155
|
status_code = str(response.status_code)
|
156
|
-
if status_code.startswith("4")
|
156
|
+
if status_code.startswith("4"):
|
157
|
+
raise ApiException("status code: " + status_code + " request args: " + str(kwargs))
|
158
|
+
if status_code.startswith("5"):
|
157
159
|
raise ApiException(response.text)
|
158
160
|
# error = Error.from_tonic_error_string(str(response.url), response.text)
|
159
161
|
# self.__print_additional_error_context(error)
|
@@ -511,6 +513,30 @@ class IndexifyClient:
|
|
511
513
|
headers={"Content-Type": "application/json"},
|
512
514
|
)
|
513
515
|
return response.json()["results"]
|
516
|
+
|
517
|
+
def list_content(self, extraction_graph: str, extraction_policy: str = "", start_id: str="", limit: int=10) -> List[Content]:
|
518
|
+
"""
|
519
|
+
List content in the current namespace.
|
520
|
+
|
521
|
+
Args:
|
522
|
+
- extraction_graph (str): extraction graph name
|
523
|
+
- start_index (str): start index for pagination
|
524
|
+
- limit (int): number of items to return
|
525
|
+
"""
|
526
|
+
params = {"graph": extraction_graph, "start_id": start_id, "limit": limit}
|
527
|
+
if extraction_policy:
|
528
|
+
params["source"] = extraction_policy
|
529
|
+
else:
|
530
|
+
params["source"] = "ingestion"
|
531
|
+
response = self.get(
|
532
|
+
f"namespaces/{self.namespace}/content",
|
533
|
+
params=params,
|
534
|
+
)
|
535
|
+
content_list = response.json()["content_list"]
|
536
|
+
content = []
|
537
|
+
for item in content_list:
|
538
|
+
content.append(Content.from_dict(item))
|
539
|
+
return content
|
514
540
|
|
515
541
|
def upload_file(
|
516
542
|
self,
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import List
|
3
|
+
from dataclasses import dataclass, field
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class Content:
|
7
|
+
id: str
|
8
|
+
parent_id: str
|
9
|
+
labels: dict[str, any]
|
10
|
+
extraction_graph_names: List[str]
|
11
|
+
extraction_policy: str
|
12
|
+
mime_type: str
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
def from_dict(cls, json: dict):
|
16
|
+
return Content(
|
17
|
+
id=json["id"],
|
18
|
+
parent_id=json["parent_id"],
|
19
|
+
labels=json["labels"],
|
20
|
+
extraction_graph_names=json["extraction_graph_names"],
|
21
|
+
extraction_policy=json["source"],
|
22
|
+
mime_type=json["mime_type"],
|
23
|
+
)
|
24
|
+
|
25
|
+
@dataclass
|
26
|
+
class TextChunk:
|
27
|
+
text: str
|
28
|
+
metadata: dict[str, any] = field(default_factory=dict)
|
29
|
+
score: float = 0.0
|
30
|
+
|
31
|
+
def to_dict(self):
|
32
|
+
return {"text": self.text, "metadata": self.metadata}
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class SearchResult:
|
37
|
+
results: List[TextChunk]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.29"
|
4
4
|
description = "Python Client for Indexify"
|
5
5
|
authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
|
6
6
|
license = "Apache 2.0"
|
@@ -1,18 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import List
|
3
|
-
from dataclasses import dataclass, field
|
4
|
-
|
5
|
-
|
6
|
-
@dataclass
|
7
|
-
class TextChunk:
|
8
|
-
text: str
|
9
|
-
metadata: dict[str, any] = field(default_factory=dict)
|
10
|
-
score: float = 0.0
|
11
|
-
|
12
|
-
def to_dict(self):
|
13
|
-
return {"text": self.text, "metadata": self.metadata}
|
14
|
-
|
15
|
-
|
16
|
-
@dataclass
|
17
|
-
class SearchResult:
|
18
|
-
results: List[TextChunk]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|