indexify 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/__init__.py +4 -2
- indexify/client.py +86 -22
- {indexify-0.0.22.dist-info → indexify-0.0.24.dist-info}/METADATA +3 -2
- {indexify-0.0.22.dist-info → indexify-0.0.24.dist-info}/RECORD +6 -6
- {indexify-0.0.22.dist-info → indexify-0.0.24.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.22.dist-info → indexify-0.0.24.dist-info}/WHEEL +0 -0
indexify/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from .index import Index
|
2
2
|
from .client import IndexifyClient
|
3
|
-
from .extraction_policy import
|
4
|
-
from .client import IndexifyClient, Document
|
3
|
+
from .extraction_policy import ExtractionGraph
|
4
|
+
from .client import IndexifyClient, Document, generate_hash_from_string, generate_unique_hex_id
|
5
5
|
from .settings import DEFAULT_SERVICE_URL
|
6
6
|
|
7
7
|
__all__ = [
|
@@ -11,4 +11,6 @@ __all__ = [
|
|
11
11
|
"ExtractionGraph",
|
12
12
|
"ExtractionGraphBuilder" "ExtractionPolicy",
|
13
13
|
"DEFAULT_SERVICE_URL",
|
14
|
+
"generate_hash_from_string",
|
15
|
+
"generate_unique_hex_id",
|
14
16
|
]
|
indexify/client.py
CHANGED
@@ -14,12 +14,37 @@ from .data_containers import TextChunk
|
|
14
14
|
from indexify.exceptions import ApiException
|
15
15
|
from dataclasses import dataclass
|
16
16
|
from typing import List, Optional, Union, Dict
|
17
|
+
import logging
|
17
18
|
|
18
19
|
Document = namedtuple("Document", ["text", "labels", "id"])
|
19
20
|
|
20
21
|
SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
|
21
22
|
|
22
23
|
|
24
|
+
def generate_unique_hex_id():
|
25
|
+
"""
|
26
|
+
Generate a unique hexadecimal identifier
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
str: a unique hexadecimal string
|
30
|
+
"""
|
31
|
+
return uuid.uuid4().hex[:16]
|
32
|
+
|
33
|
+
|
34
|
+
def generate_hash_from_string(input_string: str):
|
35
|
+
"""
|
36
|
+
Generate a hash for the given string and return it as a hexadecimal string.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
input_string (str): The input string to hash.
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
str: The hexadecimal hash of the input string.
|
43
|
+
"""
|
44
|
+
hash_object = hashlib.sha256(input_string.encode())
|
45
|
+
return hash_object.hexdigest()[:16]
|
46
|
+
|
47
|
+
|
23
48
|
@dataclass
|
24
49
|
class SqlQueryResult:
|
25
50
|
result: List[Dict]
|
@@ -129,11 +154,14 @@ class IndexifyClient:
|
|
129
154
|
response = self._client.request(method, timeout=self._timeout, **kwargs)
|
130
155
|
status_code = str(response.status_code)
|
131
156
|
if status_code.startswith("4") or status_code.startswith("5"):
|
132
|
-
|
133
|
-
|
134
|
-
|
157
|
+
raise ApiException(response.text)
|
158
|
+
# error = Error.from_tonic_error_string(str(response.url), response.text)
|
159
|
+
# self.__print_additional_error_context(error)
|
160
|
+
# raise error
|
135
161
|
except httpx.ConnectError:
|
136
|
-
message =
|
162
|
+
message = (
|
163
|
+
f"Make sure the server is running and accesible at {self._service_url}"
|
164
|
+
)
|
137
165
|
error = Error(status="ConnectionError", message=message)
|
138
166
|
print(error)
|
139
167
|
raise error
|
@@ -347,7 +375,7 @@ class IndexifyClient:
|
|
347
375
|
"""
|
348
376
|
response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
|
349
377
|
return response.json()
|
350
|
-
|
378
|
+
|
351
379
|
def download_content(self, id: str) -> bytes:
|
352
380
|
"""
|
353
381
|
Download content from id. Return bytes
|
@@ -425,6 +453,21 @@ class IndexifyClient:
|
|
425
453
|
headers={"Content-Type": "application/json"},
|
426
454
|
)
|
427
455
|
|
456
|
+
def update_labels(self, document_id: str, labels: Dict[str, str]) -> None:
|
457
|
+
"""
|
458
|
+
Update labels for a document.
|
459
|
+
|
460
|
+
Args:
|
461
|
+
- document_id (str): id of document to update
|
462
|
+
- labels (Dict[str, str]): labels to update
|
463
|
+
"""
|
464
|
+
req = {"labels": labels}
|
465
|
+
response = self.put(
|
466
|
+
f"namespaces/{self.namespace}/content/{document_id}/labels",
|
467
|
+
json=req,
|
468
|
+
headers={"Content-Type": "application/json"},
|
469
|
+
)
|
470
|
+
|
428
471
|
def update_content(self, document_id: str, path: str) -> None:
|
429
472
|
"""
|
430
473
|
Update a piece of content with a new file
|
@@ -469,7 +512,13 @@ class IndexifyClient:
|
|
469
512
|
)
|
470
513
|
return response.json()["results"]
|
471
514
|
|
472
|
-
def upload_file(
|
515
|
+
def upload_file(
|
516
|
+
self,
|
517
|
+
extraction_graphs: Union[str, List[str]],
|
518
|
+
path: str,
|
519
|
+
id=None,
|
520
|
+
labels: dict = {},
|
521
|
+
) -> str:
|
473
522
|
"""
|
474
523
|
Upload a file.
|
475
524
|
|
@@ -514,28 +563,28 @@ class IndexifyClient:
|
|
514
563
|
def get_extracted_content(self, content_id: str, level: int = 0):
|
515
564
|
"""
|
516
565
|
Get list of child for a given content id and their content up to the specified level.
|
517
|
-
|
566
|
+
|
518
567
|
Args:
|
519
568
|
- content_id (str): id of content
|
520
569
|
- level (int): depth of content retrieval (default: 0)
|
521
570
|
"""
|
522
571
|
content_tree = self.get_content_tree(content_id)
|
523
572
|
child_list = []
|
524
|
-
|
573
|
+
|
525
574
|
def traverse_content(parent_id, current_level):
|
526
575
|
if current_level > level:
|
527
576
|
return
|
528
|
-
|
529
|
-
for item in content_tree[
|
530
|
-
if item[
|
531
|
-
child_id = item[
|
577
|
+
|
578
|
+
for item in content_tree["content_tree_metadata"]:
|
579
|
+
if item["parent_id"] == parent_id:
|
580
|
+
child_id = item["id"]
|
532
581
|
content = self.download_content(child_id)
|
533
|
-
child_list.append({
|
534
|
-
|
582
|
+
child_list.append({"id": child_id, "content": content})
|
583
|
+
|
535
584
|
traverse_content(child_id, current_level + 1)
|
536
|
-
|
585
|
+
|
537
586
|
traverse_content(content_id, 0)
|
538
|
-
|
587
|
+
|
539
588
|
return child_list
|
540
589
|
|
541
590
|
def sql_query(self, query: str):
|
@@ -559,18 +608,29 @@ class IndexifyClient:
|
|
559
608
|
return SqlQueryResult(result=rows)
|
560
609
|
|
561
610
|
def ingest_remote_file(
|
562
|
-
self,
|
611
|
+
self,
|
612
|
+
extraction_graphs: Union[str, List[str]],
|
613
|
+
url: str,
|
614
|
+
mime_type: str,
|
615
|
+
labels: Dict[str, str],
|
616
|
+
id=None,
|
563
617
|
):
|
564
618
|
if isinstance(extraction_graphs, str):
|
565
619
|
extraction_graphs = [extraction_graphs]
|
566
|
-
req = {
|
620
|
+
req = {
|
621
|
+
"url": url,
|
622
|
+
"mime_type": mime_type,
|
623
|
+
"labels": labels,
|
624
|
+
"id": id,
|
625
|
+
"extraction_graph_names": extraction_graphs,
|
626
|
+
}
|
567
627
|
response = self.post(
|
568
628
|
f"namespaces/{self.namespace}/ingest_remote_file",
|
569
629
|
json=req,
|
570
630
|
headers={"Content-Type": "application/json"},
|
571
631
|
)
|
572
632
|
return response.json()
|
573
|
-
|
633
|
+
|
574
634
|
def wait_for_extraction(self, content_id: str):
|
575
635
|
"""
|
576
636
|
Wait for extraction to complete for a given content id
|
@@ -578,9 +638,7 @@ class IndexifyClient:
|
|
578
638
|
Args:
|
579
639
|
- content_id (str): id of content
|
580
640
|
"""
|
581
|
-
response = self.get(
|
582
|
-
f"namespaces/{self.namespace}/content/{content_id}/wait"
|
583
|
-
)
|
641
|
+
response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
|
584
642
|
response.raise_for_status()
|
585
643
|
|
586
644
|
def generate_unique_hex_id(self):
|
@@ -590,6 +648,9 @@ class IndexifyClient:
|
|
590
648
|
Returns:
|
591
649
|
str: a unique hexadecimal string
|
592
650
|
"""
|
651
|
+
logging.warning(
|
652
|
+
"This method is deprecated. Use generate_unique_hex_id from indexify instead."
|
653
|
+
)
|
593
654
|
return uuid.uuid4().hex[:16]
|
594
655
|
|
595
656
|
def generate_hash_from_string(self, input_string: str):
|
@@ -602,6 +663,9 @@ class IndexifyClient:
|
|
602
663
|
Returns:
|
603
664
|
str: The hexadecimal hash of the input string.
|
604
665
|
"""
|
666
|
+
logging.warning(
|
667
|
+
"This method is deprecated. Use generate_hash_from_string from indexify instead."
|
668
|
+
)
|
605
669
|
hash_object = hashlib.sha256(input_string.encode())
|
606
670
|
return hash_object.hexdigest()[:16]
|
607
671
|
|
@@ -1,14 +1,15 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.24
|
4
4
|
Summary: Python Client for Indexify
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
7
7
|
Author: Diptanu Gon Choudhury
|
8
8
|
Author-email: diptanuc@gmail.com
|
9
|
-
Requires-Python: >=3.
|
9
|
+
Requires-Python: >=3.9,<4.0
|
10
10
|
Classifier: License :: Other/Proprietary License
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
12
13
|
Classifier: Programming Language :: Python :: 3.10
|
13
14
|
Classifier: Programming Language :: Python :: 3.11
|
14
15
|
Classifier: Programming Language :: Python :: 3.12
|
@@ -1,5 +1,5 @@
|
|
1
|
-
indexify/__init__.py,sha256=
|
2
|
-
indexify/client.py,sha256=
|
1
|
+
indexify/__init__.py,sha256=Y40-Ur_tL7kGGs-reh9BTfEYGe-KyGxgdg-CmoFsXRQ,473
|
2
|
+
indexify/client.py,sha256=AMhzAzFB1Q2eGc50zFSYft4dKs40HPXbOD-GR4y_Q8I,21916
|
3
3
|
indexify/data_containers.py,sha256=r1wxJPtsmXbyKvb17fqxm-dPjKz51oZ62f8A8Zxls1c,361
|
4
4
|
indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
|
5
5
|
indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
|
@@ -8,7 +8,7 @@ indexify/extractor.py,sha256=sWFLlXHgEfWlmiKAXN6ytUt_uG7th-XGNHqz-TG39gs,1216
|
|
8
8
|
indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
|
9
9
|
indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
|
10
10
|
indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
|
11
|
-
indexify-0.0.
|
12
|
-
indexify-0.0.
|
13
|
-
indexify-0.0.
|
14
|
-
indexify-0.0.
|
11
|
+
indexify-0.0.24.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
12
|
+
indexify-0.0.24.dist-info/METADATA,sha256=3u6fw5r8HJNRfjuMD8xzjFOH_NgbMRsS7xeoESwivlw,1798
|
13
|
+
indexify-0.0.24.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
14
|
+
indexify-0.0.24.dist-info/RECORD,,
|
File without changes
|
File without changes
|