indexify 0.0.23__tar.gz → 0.0.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.23
3
+ Version: 0.0.25
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
7
7
  Author: Diptanu Gon Choudhury
8
8
  Author-email: diptanuc@gmail.com
9
- Requires-Python: >=3.10.0,<4.0.0
9
+ Requires-Python: >=3.9,<4.0
10
10
  Classifier: License :: Other/Proprietary License
11
11
  Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
12
13
  Classifier: Programming Language :: Python :: 3.10
13
14
  Classifier: Programming Language :: Python :: 3.11
14
15
  Classifier: Programming Language :: Python :: 3.12
@@ -20,6 +20,7 @@ Document = namedtuple("Document", ["text", "labels", "id"])
20
20
 
21
21
  SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
22
22
 
23
+
23
24
  def generate_unique_hex_id():
24
25
  """
25
26
  Generate a unique hexadecimal identifier
@@ -29,6 +30,7 @@ def generate_unique_hex_id():
29
30
  """
30
31
  return uuid.uuid4().hex[:16]
31
32
 
33
+
32
34
  def generate_hash_from_string(input_string: str):
33
35
  """
34
36
  Generate a hash for the given string and return it as a hexadecimal string.
@@ -153,11 +155,13 @@ class IndexifyClient:
153
155
  status_code = str(response.status_code)
154
156
  if status_code.startswith("4") or status_code.startswith("5"):
155
157
  raise ApiException(response.text)
156
- #error = Error.from_tonic_error_string(str(response.url), response.text)
157
- #self.__print_additional_error_context(error)
158
- #raise error
158
+ # error = Error.from_tonic_error_string(str(response.url), response.text)
159
+ # self.__print_additional_error_context(error)
160
+ # raise error
159
161
  except httpx.ConnectError:
160
- message = f"Make sure the server is running and accesible at {self._service_url}"
162
+ message = (
163
+ f"Make sure the server is running and accesible at {self._service_url}"
164
+ )
161
165
  error = Error(status="ConnectionError", message=message)
162
166
  print(error)
163
167
  raise error
@@ -371,7 +375,7 @@ class IndexifyClient:
371
375
  """
372
376
  response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
373
377
  return response.json()
374
-
378
+
375
379
  def download_content(self, id: str) -> bytes:
376
380
  """
377
381
  Download content from id. Return bytes
@@ -449,6 +453,21 @@ class IndexifyClient:
449
453
  headers={"Content-Type": "application/json"},
450
454
  )
451
455
 
456
+ def update_labels(self, document_id: str, labels: Dict[str, str]) -> None:
457
+ """
458
+ Update labels for a document.
459
+
460
+ Args:
461
+ - document_id (str): id of document to update
462
+ - labels (Dict[str, str]): labels to update
463
+ """
464
+ req = {"labels": labels}
465
+ response = self.put(
466
+ f"namespaces/{self.namespace}/content/{document_id}/labels",
467
+ json=req,
468
+ headers={"Content-Type": "application/json"},
469
+ )
470
+
452
471
  def update_content(self, document_id: str, path: str) -> None:
453
472
  """
454
473
  Update a piece of content with a new file
@@ -493,7 +512,13 @@ class IndexifyClient:
493
512
  )
494
513
  return response.json()["results"]
495
514
 
496
- def upload_file(self, extraction_graphs: Union[str, List[str]], path: str, id=None, labels: dict = {}) -> str:
515
+ def upload_file(
516
+ self,
517
+ extraction_graphs: Union[str, List[str]],
518
+ path: str,
519
+ id=None,
520
+ labels: dict = {},
521
+ ) -> str:
497
522
  """
498
523
  Upload a file.
499
524
 
@@ -510,7 +535,7 @@ class IndexifyClient:
510
535
  response = self.post(
511
536
  f"namespaces/{self.namespace}/upload_file",
512
537
  files={"file": f},
513
- data=labels,
538
+ data={"labels": json.dumps(labels)},
514
539
  params=params,
515
540
  )
516
541
  response_json = response.json()
@@ -538,28 +563,28 @@ class IndexifyClient:
538
563
  def get_extracted_content(self, content_id: str, level: int = 0):
539
564
  """
540
565
  Get list of child for a given content id and their content up to the specified level.
541
-
566
+
542
567
  Args:
543
568
  - content_id (str): id of content
544
569
  - level (int): depth of content retrieval (default: 0)
545
570
  """
546
571
  content_tree = self.get_content_tree(content_id)
547
572
  child_list = []
548
-
573
+
549
574
  def traverse_content(parent_id, current_level):
550
575
  if current_level > level:
551
576
  return
552
-
553
- for item in content_tree['content_tree_metadata']:
554
- if item['parent_id'] == parent_id:
555
- child_id = item['id']
577
+
578
+ for item in content_tree["content_tree_metadata"]:
579
+ if item["parent_id"] == parent_id:
580
+ child_id = item["id"]
556
581
  content = self.download_content(child_id)
557
- child_list.append({'id': child_id, 'content': content})
558
-
582
+ child_list.append({"id": child_id, "content": content})
583
+
559
584
  traverse_content(child_id, current_level + 1)
560
-
585
+
561
586
  traverse_content(content_id, 0)
562
-
587
+
563
588
  return child_list
564
589
 
565
590
  def sql_query(self, query: str):
@@ -583,18 +608,29 @@ class IndexifyClient:
583
608
  return SqlQueryResult(result=rows)
584
609
 
585
610
  def ingest_remote_file(
586
- self, extraction_graphs: Union[str, List[str]], url: str, mime_type: str, labels: Dict[str, str], id=None
611
+ self,
612
+ extraction_graphs: Union[str, List[str]],
613
+ url: str,
614
+ mime_type: str,
615
+ labels: Dict[str, str],
616
+ id=None,
587
617
  ):
588
618
  if isinstance(extraction_graphs, str):
589
619
  extraction_graphs = [extraction_graphs]
590
- req = {"url": url, "mime_type": mime_type, "labels": labels, "id": id, "extraction_graph_names": extraction_graphs}
620
+ req = {
621
+ "url": url,
622
+ "mime_type": mime_type,
623
+ "labels": labels,
624
+ "id": id,
625
+ "extraction_graph_names": extraction_graphs,
626
+ }
591
627
  response = self.post(
592
628
  f"namespaces/{self.namespace}/ingest_remote_file",
593
629
  json=req,
594
630
  headers={"Content-Type": "application/json"},
595
631
  )
596
632
  return response.json()
597
-
633
+
598
634
  def wait_for_extraction(self, content_id: str):
599
635
  """
600
636
  Wait for extraction to complete for a given content id
@@ -602,9 +638,7 @@ class IndexifyClient:
602
638
  Args:
603
639
  - content_id (str): id of content
604
640
  """
605
- response = self.get(
606
- f"namespaces/{self.namespace}/content/{content_id}/wait"
607
- )
641
+ response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
608
642
  response.raise_for_status()
609
643
 
610
644
  def generate_unique_hex_id(self):
@@ -614,7 +648,9 @@ class IndexifyClient:
614
648
  Returns:
615
649
  str: a unique hexadecimal string
616
650
  """
617
- logging.warning("This method is deprecated. Use generate_unique_hex_id from indexify instead.")
651
+ logging.warning(
652
+ "This method is deprecated. Use generate_unique_hex_id from indexify instead."
653
+ )
618
654
  return uuid.uuid4().hex[:16]
619
655
 
620
656
  def generate_hash_from_string(self, input_string: str):
@@ -627,7 +663,9 @@ class IndexifyClient:
627
663
  Returns:
628
664
  str: The hexadecimal hash of the input string.
629
665
  """
630
- logging.warning("This method is deprecated. Use generate_hash_from_string from indexify instead.")
666
+ logging.warning(
667
+ "This method is deprecated. Use generate_hash_from_string from indexify instead."
668
+ )
631
669
  hash_object = hashlib.sha256(input_string.encode())
632
670
  return hash_object.hexdigest()[:16]
633
671
 
@@ -1,15 +1,15 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.0.23"
3
+ version = "0.0.25"
4
4
  description = "Python Client for Indexify"
5
- authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
5
+ authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
6
6
  license = "Apache 2.0"
7
7
  readme = "README.md"
8
8
  homepage = "https://github.com/tensorlakeai/indexify"
9
9
  repository = "https://github.com/tensorlakeai/indexify"
10
10
 
11
11
  [tool.poetry.dependencies]
12
- python = "^3.10.0"
12
+ python = "^3.9"
13
13
  httpx = { version = "^0.26", extras = ["http2"] }
14
14
  pyyaml = "^6.0.1"
15
15
 
File without changes
File without changes
File without changes
File without changes
File without changes