indexify 0.0.23__tar.gz → 0.0.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.0.23 → indexify-0.0.25}/PKG-INFO +3 -2
- {indexify-0.0.23 → indexify-0.0.25}/indexify/client.py +63 -25
- {indexify-0.0.23 → indexify-0.0.25}/pyproject.toml +3 -3
- {indexify-0.0.23 → indexify-0.0.25}/LICENSE.txt +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/README.md +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/__init__.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/data_containers.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/error.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/exceptions.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/extraction_policy.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/extractor.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/index.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/settings.py +0 -0
- {indexify-0.0.23 → indexify-0.0.25}/indexify/utils.py +0 -0
@@ -1,14 +1,15 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.25
|
4
4
|
Summary: Python Client for Indexify
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
7
7
|
Author: Diptanu Gon Choudhury
|
8
8
|
Author-email: diptanuc@gmail.com
|
9
|
-
Requires-Python: >=3.
|
9
|
+
Requires-Python: >=3.9,<4.0
|
10
10
|
Classifier: License :: Other/Proprietary License
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
12
13
|
Classifier: Programming Language :: Python :: 3.10
|
13
14
|
Classifier: Programming Language :: Python :: 3.11
|
14
15
|
Classifier: Programming Language :: Python :: 3.12
|
@@ -20,6 +20,7 @@ Document = namedtuple("Document", ["text", "labels", "id"])
|
|
20
20
|
|
21
21
|
SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
|
22
22
|
|
23
|
+
|
23
24
|
def generate_unique_hex_id():
|
24
25
|
"""
|
25
26
|
Generate a unique hexadecimal identifier
|
@@ -29,6 +30,7 @@ def generate_unique_hex_id():
|
|
29
30
|
"""
|
30
31
|
return uuid.uuid4().hex[:16]
|
31
32
|
|
33
|
+
|
32
34
|
def generate_hash_from_string(input_string: str):
|
33
35
|
"""
|
34
36
|
Generate a hash for the given string and return it as a hexadecimal string.
|
@@ -153,11 +155,13 @@ class IndexifyClient:
|
|
153
155
|
status_code = str(response.status_code)
|
154
156
|
if status_code.startswith("4") or status_code.startswith("5"):
|
155
157
|
raise ApiException(response.text)
|
156
|
-
#error = Error.from_tonic_error_string(str(response.url), response.text)
|
157
|
-
#self.__print_additional_error_context(error)
|
158
|
-
#raise error
|
158
|
+
# error = Error.from_tonic_error_string(str(response.url), response.text)
|
159
|
+
# self.__print_additional_error_context(error)
|
160
|
+
# raise error
|
159
161
|
except httpx.ConnectError:
|
160
|
-
message =
|
162
|
+
message = (
|
163
|
+
f"Make sure the server is running and accesible at {self._service_url}"
|
164
|
+
)
|
161
165
|
error = Error(status="ConnectionError", message=message)
|
162
166
|
print(error)
|
163
167
|
raise error
|
@@ -371,7 +375,7 @@ class IndexifyClient:
|
|
371
375
|
"""
|
372
376
|
response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
|
373
377
|
return response.json()
|
374
|
-
|
378
|
+
|
375
379
|
def download_content(self, id: str) -> bytes:
|
376
380
|
"""
|
377
381
|
Download content from id. Return bytes
|
@@ -449,6 +453,21 @@ class IndexifyClient:
|
|
449
453
|
headers={"Content-Type": "application/json"},
|
450
454
|
)
|
451
455
|
|
456
|
+
def update_labels(self, document_id: str, labels: Dict[str, str]) -> None:
|
457
|
+
"""
|
458
|
+
Update labels for a document.
|
459
|
+
|
460
|
+
Args:
|
461
|
+
- document_id (str): id of document to update
|
462
|
+
- labels (Dict[str, str]): labels to update
|
463
|
+
"""
|
464
|
+
req = {"labels": labels}
|
465
|
+
response = self.put(
|
466
|
+
f"namespaces/{self.namespace}/content/{document_id}/labels",
|
467
|
+
json=req,
|
468
|
+
headers={"Content-Type": "application/json"},
|
469
|
+
)
|
470
|
+
|
452
471
|
def update_content(self, document_id: str, path: str) -> None:
|
453
472
|
"""
|
454
473
|
Update a piece of content with a new file
|
@@ -493,7 +512,13 @@ class IndexifyClient:
|
|
493
512
|
)
|
494
513
|
return response.json()["results"]
|
495
514
|
|
496
|
-
def upload_file(
|
515
|
+
def upload_file(
|
516
|
+
self,
|
517
|
+
extraction_graphs: Union[str, List[str]],
|
518
|
+
path: str,
|
519
|
+
id=None,
|
520
|
+
labels: dict = {},
|
521
|
+
) -> str:
|
497
522
|
"""
|
498
523
|
Upload a file.
|
499
524
|
|
@@ -510,7 +535,7 @@ class IndexifyClient:
|
|
510
535
|
response = self.post(
|
511
536
|
f"namespaces/{self.namespace}/upload_file",
|
512
537
|
files={"file": f},
|
513
|
-
data=labels,
|
538
|
+
data={"labels": json.dumps(labels)},
|
514
539
|
params=params,
|
515
540
|
)
|
516
541
|
response_json = response.json()
|
@@ -538,28 +563,28 @@ class IndexifyClient:
|
|
538
563
|
def get_extracted_content(self, content_id: str, level: int = 0):
|
539
564
|
"""
|
540
565
|
Get list of child for a given content id and their content up to the specified level.
|
541
|
-
|
566
|
+
|
542
567
|
Args:
|
543
568
|
- content_id (str): id of content
|
544
569
|
- level (int): depth of content retrieval (default: 0)
|
545
570
|
"""
|
546
571
|
content_tree = self.get_content_tree(content_id)
|
547
572
|
child_list = []
|
548
|
-
|
573
|
+
|
549
574
|
def traverse_content(parent_id, current_level):
|
550
575
|
if current_level > level:
|
551
576
|
return
|
552
|
-
|
553
|
-
for item in content_tree[
|
554
|
-
if item[
|
555
|
-
child_id = item[
|
577
|
+
|
578
|
+
for item in content_tree["content_tree_metadata"]:
|
579
|
+
if item["parent_id"] == parent_id:
|
580
|
+
child_id = item["id"]
|
556
581
|
content = self.download_content(child_id)
|
557
|
-
child_list.append({
|
558
|
-
|
582
|
+
child_list.append({"id": child_id, "content": content})
|
583
|
+
|
559
584
|
traverse_content(child_id, current_level + 1)
|
560
|
-
|
585
|
+
|
561
586
|
traverse_content(content_id, 0)
|
562
|
-
|
587
|
+
|
563
588
|
return child_list
|
564
589
|
|
565
590
|
def sql_query(self, query: str):
|
@@ -583,18 +608,29 @@ class IndexifyClient:
|
|
583
608
|
return SqlQueryResult(result=rows)
|
584
609
|
|
585
610
|
def ingest_remote_file(
|
586
|
-
self,
|
611
|
+
self,
|
612
|
+
extraction_graphs: Union[str, List[str]],
|
613
|
+
url: str,
|
614
|
+
mime_type: str,
|
615
|
+
labels: Dict[str, str],
|
616
|
+
id=None,
|
587
617
|
):
|
588
618
|
if isinstance(extraction_graphs, str):
|
589
619
|
extraction_graphs = [extraction_graphs]
|
590
|
-
req = {
|
620
|
+
req = {
|
621
|
+
"url": url,
|
622
|
+
"mime_type": mime_type,
|
623
|
+
"labels": labels,
|
624
|
+
"id": id,
|
625
|
+
"extraction_graph_names": extraction_graphs,
|
626
|
+
}
|
591
627
|
response = self.post(
|
592
628
|
f"namespaces/{self.namespace}/ingest_remote_file",
|
593
629
|
json=req,
|
594
630
|
headers={"Content-Type": "application/json"},
|
595
631
|
)
|
596
632
|
return response.json()
|
597
|
-
|
633
|
+
|
598
634
|
def wait_for_extraction(self, content_id: str):
|
599
635
|
"""
|
600
636
|
Wait for extraction to complete for a given content id
|
@@ -602,9 +638,7 @@ class IndexifyClient:
|
|
602
638
|
Args:
|
603
639
|
- content_id (str): id of content
|
604
640
|
"""
|
605
|
-
response = self.get(
|
606
|
-
f"namespaces/{self.namespace}/content/{content_id}/wait"
|
607
|
-
)
|
641
|
+
response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
|
608
642
|
response.raise_for_status()
|
609
643
|
|
610
644
|
def generate_unique_hex_id(self):
|
@@ -614,7 +648,9 @@ class IndexifyClient:
|
|
614
648
|
Returns:
|
615
649
|
str: a unique hexadecimal string
|
616
650
|
"""
|
617
|
-
logging.warning(
|
651
|
+
logging.warning(
|
652
|
+
"This method is deprecated. Use generate_unique_hex_id from indexify instead."
|
653
|
+
)
|
618
654
|
return uuid.uuid4().hex[:16]
|
619
655
|
|
620
656
|
def generate_hash_from_string(self, input_string: str):
|
@@ -627,7 +663,9 @@ class IndexifyClient:
|
|
627
663
|
Returns:
|
628
664
|
str: The hexadecimal hash of the input string.
|
629
665
|
"""
|
630
|
-
logging.warning(
|
666
|
+
logging.warning(
|
667
|
+
"This method is deprecated. Use generate_hash_from_string from indexify instead."
|
668
|
+
)
|
631
669
|
hash_object = hashlib.sha256(input_string.encode())
|
632
670
|
return hash_object.hexdigest()[:16]
|
633
671
|
|
@@ -1,15 +1,15 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.25"
|
4
4
|
description = "Python Client for Indexify"
|
5
|
-
authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
|
5
|
+
authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
|
6
6
|
license = "Apache 2.0"
|
7
7
|
readme = "README.md"
|
8
8
|
homepage = "https://github.com/tensorlakeai/indexify"
|
9
9
|
repository = "https://github.com/tensorlakeai/indexify"
|
10
10
|
|
11
11
|
[tool.poetry.dependencies]
|
12
|
-
python = "^3.
|
12
|
+
python = "^3.9"
|
13
13
|
httpx = { version = "^0.26", extras = ["http2"] }
|
14
14
|
pyyaml = "^6.0.1"
|
15
15
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|