indexify 0.0.22__tar.gz → 0.0.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.22
3
+ Version: 0.0.24
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
7
7
  Author: Diptanu Gon Choudhury
8
8
  Author-email: diptanuc@gmail.com
9
- Requires-Python: >=3.10.0,<4.0.0
9
+ Requires-Python: >=3.9,<4.0
10
10
  Classifier: License :: Other/Proprietary License
11
11
  Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
12
13
  Classifier: Programming Language :: Python :: 3.10
13
14
  Classifier: Programming Language :: Python :: 3.11
14
15
  Classifier: Programming Language :: Python :: 3.12
@@ -1,7 +1,7 @@
1
1
  from .index import Index
2
2
  from .client import IndexifyClient
3
- from .extraction_policy import ExtractionPolicy, ExtractionGraphBuilder, ExtractionGraph
4
- from .client import IndexifyClient, Document
3
+ from .extraction_policy import ExtractionGraph
4
+ from .client import IndexifyClient, Document, generate_hash_from_string, generate_unique_hex_id
5
5
  from .settings import DEFAULT_SERVICE_URL
6
6
 
7
7
  __all__ = [
@@ -11,4 +11,6 @@ __all__ = [
11
11
  "ExtractionGraph",
12
12
  "ExtractionGraphBuilder" "ExtractionPolicy",
13
13
  "DEFAULT_SERVICE_URL",
14
+ "generate_hash_from_string",
15
+ "generate_unique_hex_id",
14
16
  ]
@@ -14,12 +14,37 @@ from .data_containers import TextChunk
14
14
  from indexify.exceptions import ApiException
15
15
  from dataclasses import dataclass
16
16
  from typing import List, Optional, Union, Dict
17
+ import logging
17
18
 
18
19
  Document = namedtuple("Document", ["text", "labels", "id"])
19
20
 
20
21
  SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
21
22
 
22
23
 
24
+ def generate_unique_hex_id():
25
+ """
26
+ Generate a unique hexadecimal identifier
27
+
28
+ Returns:
29
+ str: a unique hexadecimal string
30
+ """
31
+ return uuid.uuid4().hex[:16]
32
+
33
+
34
+ def generate_hash_from_string(input_string: str):
35
+ """
36
+ Generate a hash for the given string and return it as a hexadecimal string.
37
+
38
+ Args:
39
+ input_string (str): The input string to hash.
40
+
41
+ Returns:
42
+ str: The hexadecimal hash of the input string.
43
+ """
44
+ hash_object = hashlib.sha256(input_string.encode())
45
+ return hash_object.hexdigest()[:16]
46
+
47
+
23
48
  @dataclass
24
49
  class SqlQueryResult:
25
50
  result: List[Dict]
@@ -129,11 +154,14 @@ class IndexifyClient:
129
154
  response = self._client.request(method, timeout=self._timeout, **kwargs)
130
155
  status_code = str(response.status_code)
131
156
  if status_code.startswith("4") or status_code.startswith("5"):
132
- error = Error.from_tonic_error_string(str(response.url), response.text)
133
- self.__print_additional_error_context(error)
134
- raise error
157
+ raise ApiException(response.text)
158
+ # error = Error.from_tonic_error_string(str(response.url), response.text)
159
+ # self.__print_additional_error_context(error)
160
+ # raise error
135
161
  except httpx.ConnectError:
136
- message = f"Make sure the server is running and accesible at {self._service_url}"
162
+ message = (
163
+ f"Make sure the server is running and accesible at {self._service_url}"
164
+ )
137
165
  error = Error(status="ConnectionError", message=message)
138
166
  print(error)
139
167
  raise error
@@ -347,7 +375,7 @@ class IndexifyClient:
347
375
  """
348
376
  response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
349
377
  return response.json()
350
-
378
+
351
379
  def download_content(self, id: str) -> bytes:
352
380
  """
353
381
  Download content from id. Return bytes
@@ -425,6 +453,21 @@ class IndexifyClient:
425
453
  headers={"Content-Type": "application/json"},
426
454
  )
427
455
 
456
+ def update_labels(self, document_id: str, labels: Dict[str, str]) -> None:
457
+ """
458
+ Update labels for a document.
459
+
460
+ Args:
461
+ - document_id (str): id of document to update
462
+ - labels (Dict[str, str]): labels to update
463
+ """
464
+ req = {"labels": labels}
465
+ response = self.put(
466
+ f"namespaces/{self.namespace}/content/{document_id}/labels",
467
+ json=req,
468
+ headers={"Content-Type": "application/json"},
469
+ )
470
+
428
471
  def update_content(self, document_id: str, path: str) -> None:
429
472
  """
430
473
  Update a piece of content with a new file
@@ -469,7 +512,13 @@ class IndexifyClient:
469
512
  )
470
513
  return response.json()["results"]
471
514
 
472
- def upload_file(self, extraction_graphs: Union[str, List[str]], path: str, id=None, labels: dict = {}) -> str:
515
+ def upload_file(
516
+ self,
517
+ extraction_graphs: Union[str, List[str]],
518
+ path: str,
519
+ id=None,
520
+ labels: dict = {},
521
+ ) -> str:
473
522
  """
474
523
  Upload a file.
475
524
 
@@ -514,28 +563,28 @@ class IndexifyClient:
514
563
  def get_extracted_content(self, content_id: str, level: int = 0):
515
564
  """
516
565
  Get list of child for a given content id and their content up to the specified level.
517
-
566
+
518
567
  Args:
519
568
  - content_id (str): id of content
520
569
  - level (int): depth of content retrieval (default: 0)
521
570
  """
522
571
  content_tree = self.get_content_tree(content_id)
523
572
  child_list = []
524
-
573
+
525
574
  def traverse_content(parent_id, current_level):
526
575
  if current_level > level:
527
576
  return
528
-
529
- for item in content_tree['content_tree_metadata']:
530
- if item['parent_id'] == parent_id:
531
- child_id = item['id']
577
+
578
+ for item in content_tree["content_tree_metadata"]:
579
+ if item["parent_id"] == parent_id:
580
+ child_id = item["id"]
532
581
  content = self.download_content(child_id)
533
- child_list.append({'id': child_id, 'content': content})
534
-
582
+ child_list.append({"id": child_id, "content": content})
583
+
535
584
  traverse_content(child_id, current_level + 1)
536
-
585
+
537
586
  traverse_content(content_id, 0)
538
-
587
+
539
588
  return child_list
540
589
 
541
590
  def sql_query(self, query: str):
@@ -559,18 +608,29 @@ class IndexifyClient:
559
608
  return SqlQueryResult(result=rows)
560
609
 
561
610
  def ingest_remote_file(
562
- self, extraction_graphs: Union[str, List[str]], url: str, mime_type: str, labels: Dict[str, str], id=None
611
+ self,
612
+ extraction_graphs: Union[str, List[str]],
613
+ url: str,
614
+ mime_type: str,
615
+ labels: Dict[str, str],
616
+ id=None,
563
617
  ):
564
618
  if isinstance(extraction_graphs, str):
565
619
  extraction_graphs = [extraction_graphs]
566
- req = {"url": url, "mime_type": mime_type, "labels": labels, "id": id, "extraction_graph_names": extraction_graphs}
620
+ req = {
621
+ "url": url,
622
+ "mime_type": mime_type,
623
+ "labels": labels,
624
+ "id": id,
625
+ "extraction_graph_names": extraction_graphs,
626
+ }
567
627
  response = self.post(
568
628
  f"namespaces/{self.namespace}/ingest_remote_file",
569
629
  json=req,
570
630
  headers={"Content-Type": "application/json"},
571
631
  )
572
632
  return response.json()
573
-
633
+
574
634
  def wait_for_extraction(self, content_id: str):
575
635
  """
576
636
  Wait for extraction to complete for a given content id
@@ -578,9 +638,7 @@ class IndexifyClient:
578
638
  Args:
579
639
  - content_id (str): id of content
580
640
  """
581
- response = self.get(
582
- f"namespaces/{self.namespace}/content/{content_id}/wait"
583
- )
641
+ response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
584
642
  response.raise_for_status()
585
643
 
586
644
  def generate_unique_hex_id(self):
@@ -590,6 +648,9 @@ class IndexifyClient:
590
648
  Returns:
591
649
  str: a unique hexadecimal string
592
650
  """
651
+ logging.warning(
652
+ "This method is deprecated. Use generate_unique_hex_id from indexify instead."
653
+ )
593
654
  return uuid.uuid4().hex[:16]
594
655
 
595
656
  def generate_hash_from_string(self, input_string: str):
@@ -602,6 +663,9 @@ class IndexifyClient:
602
663
  Returns:
603
664
  str: The hexadecimal hash of the input string.
604
665
  """
666
+ logging.warning(
667
+ "This method is deprecated. Use generate_hash_from_string from indexify instead."
668
+ )
605
669
  hash_object = hashlib.sha256(input_string.encode())
606
670
  return hash_object.hexdigest()[:16]
607
671
 
@@ -1,15 +1,15 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.0.22"
3
+ version = "0.0.24"
4
4
  description = "Python Client for Indexify"
5
- authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
5
+ authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
6
6
  license = "Apache 2.0"
7
7
  readme = "README.md"
8
8
  homepage = "https://github.com/tensorlakeai/indexify"
9
9
  repository = "https://github.com/tensorlakeai/indexify"
10
10
 
11
11
  [tool.poetry.dependencies]
12
- python = "^3.10.0"
12
+ python = "^3.9"
13
13
  httpx = { version = "^0.26", extras = ["http2"] }
14
14
  pyyaml = "^6.0.1"
15
15
 
File without changes
File without changes
File without changes
File without changes
File without changes