indexify 0.0.29__tar.gz → 0.0.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.29
3
+ Version: 0.0.31
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -35,8 +35,8 @@ pip install indexify
35
35
 
36
36
  ## Usage
37
37
 
38
- See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
39
- Look at the [examples](examples) directory for more examples.
38
+ See the [getting started](https://docs.getindexify.com/getting_started/) guide for examples of how to use the client.
39
+ Look at the [examples](https://github.com/tensorlakeai/indexify/tree/main/examples) directory for more examples.
40
40
 
41
41
  ## Development
42
42
 
@@ -15,8 +15,8 @@ pip install indexify
15
15
 
16
16
  ## Usage
17
17
 
18
- See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
19
- Look at the [examples](examples) directory for more examples.
18
+ See the [getting started](https://docs.getindexify.com/getting_started/) guide for examples of how to use the client.
19
+ Look at the [examples](https://github.com/tensorlakeai/indexify/tree/main/examples) directory for more examples.
20
20
 
21
21
  ## Development
22
22
 
@@ -154,7 +154,9 @@ class IndexifyClient:
154
154
  response = self._client.request(method, timeout=self._timeout, **kwargs)
155
155
  status_code = str(response.status_code)
156
156
  if status_code.startswith("4"):
157
- raise ApiException("status code: " + status_code + " request args: " + str(kwargs))
157
+ raise ApiException(
158
+ "status code: " + status_code + " request args: " + str(kwargs)
159
+ )
158
160
  if status_code.startswith("5"):
159
161
  raise ApiException(response.text)
160
162
  # error = Error.from_tonic_error_string(str(response.url), response.text)
@@ -342,11 +344,11 @@ class IndexifyClient:
342
344
  """
343
345
  Retrieve and update the list of extraction policies for the current namespace.
344
346
  """
345
- response = self.get(f"namespaces/{self.namespace}")
347
+ response = self.get(f"namespaces/{self.namespace}/extraction_graphs")
346
348
  json = response.json()
347
349
 
348
350
  self.extraction_graphs = []
349
- for graph in json["namespace"]["extraction_graphs"]:
351
+ for graph in json["extraction_graphs"]:
350
352
  self.extraction_graphs.append(ExtractionGraph.from_dict(graph))
351
353
 
352
354
  return self.extraction_graphs
@@ -368,6 +370,28 @@ class IndexifyClient:
368
370
  )
369
371
  return
370
372
 
373
+ def link_extraction_graphs(
374
+ self, source_graph: str, content_source: str, linked_graph: str
375
+ ):
376
+ """
377
+ Link an extraction graph to another extraction graph.
378
+
379
+ Args:
380
+ - source_graph (str): source extraction graph
381
+ - content_source (str): content source in source graph
382
+ - linked_graph (str): target extraction graph
383
+ """
384
+ req = {
385
+ "content_source": content_source,
386
+ "linked_graph_name": linked_graph,
387
+ }
388
+ response = self.post(
389
+ f"namespaces/{self.namespace}/extraction_graphs/{source_graph}/links",
390
+ json=req,
391
+ headers={"Content-Type": "application/json"},
392
+ )
393
+ return
394
+
371
395
  def get_content_metadata(self, content_id: str) -> dict:
372
396
  """
373
397
  Get metadata for a specific content ID in a given index.
@@ -375,17 +399,17 @@ class IndexifyClient:
375
399
  Args:
376
400
  - content_id (str): content id to query
377
401
  """
378
- response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
402
+ response = self.get(f"namespaces/{self.namespace}/content/{content_id}/metadata")
379
403
  return response.json()
380
404
 
381
- def download_content(self, id: str) -> bytes:
405
+ def download_content(self, content_id: str) -> bytes:
382
406
  """
383
407
  Download content from id. Return bytes
384
408
 
385
409
  Args:
386
- - id (str): id of content to download
410
+ - content_id (str): id of content to download
387
411
  """
388
- response = self.get(f"namespaces/{self.namespace}/content/{id}/download")
412
+ response = self.get(f"namespaces/{self.namespace}/content/{content_id}/download")
389
413
  return response.content
390
414
 
391
415
  def add_documents(
@@ -424,21 +448,21 @@ class IndexifyClient:
424
448
  raise TypeError(
425
449
  "Invalid type for documents. Expected Document, str, or list of these."
426
450
  )
427
-
428
- req = {
429
- "documents": [doc._asdict() for doc in documents],
430
- "extraction_graph_names": extraction_graphs,
431
- }
432
- response = self.post(
433
- f"namespaces/{self.namespace}/add_texts",
434
- json=req,
435
- headers={"Content-Type": "application/json"},
436
- )
437
- response.raise_for_status()
438
- response_json = response.json()
439
- content_ids = response_json["content_ids"]
440
- if len(documents) == 1 and len(content_ids) == 1:
441
- return content_ids[0]
451
+ for document in documents:
452
+ document.labels["mime_type"] = "text/plain"
453
+ content_ids = []
454
+ if isinstance(extraction_graphs, str):
455
+ extraction_graphs = [extraction_graphs]
456
+ for extraction_graph in extraction_graphs:
457
+ for document in documents:
458
+ response = self.post(
459
+ f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
460
+ files={"file": document.text},
461
+ data={"labels": json.dumps(document.labels)},
462
+ )
463
+ response_json = response.json()
464
+ content_id = response_json["content_id"]
465
+ content_ids.append(content_id)
442
466
  return content_ids
443
467
 
444
468
  def delete_documents(self, document_ids: List[str]) -> None:
@@ -506,15 +530,22 @@ class IndexifyClient:
506
530
  - top_k (int): top k nearest neighbors to be returned
507
531
  - filters (List[str]): list of filters to apply
508
532
  """
509
- req = {"index": name, "query": query, "k": top_k, "filters": filters}
533
+ req = {"query": query, "k": top_k, "filters": filters}
510
534
  response = self.post(
511
- f"namespaces/{self.namespace}/search",
535
+ f"namespaces/{self.namespace}/indexes/{name}/search",
512
536
  json=req,
513
537
  headers={"Content-Type": "application/json"},
514
538
  )
515
539
  return response.json()["results"]
516
-
517
- def list_content(self, extraction_graph: str, extraction_policy: str = "", start_id: str="", limit: int=10) -> List[Content]:
540
+
541
+ def list_content(
542
+ self,
543
+ extraction_graph: str,
544
+ extraction_policy: str = "",
545
+ labels_filter: List[str] = [],
546
+ start_id: str = "",
547
+ limit: int = 10,
548
+ ) -> List[Content]:
518
549
  """
519
550
  List content in the current namespace.
520
551
 
@@ -528,6 +559,8 @@ class IndexifyClient:
528
559
  params["source"] = extraction_policy
529
560
  else:
530
561
  params["source"] = "ingestion"
562
+ if len(labels_filter) > 0:
563
+ params["labels_filter"] = labels_filter
531
564
  response = self.get(
532
565
  f"namespaces/{self.namespace}/content",
533
566
  params=params,
@@ -554,18 +587,20 @@ class IndexifyClient:
554
587
  """
555
588
  if isinstance(extraction_graphs, str):
556
589
  extraction_graphs = [extraction_graphs]
557
- params = {"extraction_graph_names": extraction_graphs}
590
+ params = {}
558
591
  if id is not None:
559
592
  params["id"] = id
560
593
  with open(path, "rb") as f:
561
- response = self.post(
562
- f"namespaces/{self.namespace}/upload_file",
563
- files={"file": f},
564
- data={"labels": json.dumps(labels)},
565
- params=params,
566
- )
594
+ for extraction_graph in extraction_graphs:
595
+ response = self.post(
596
+ f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
597
+ files={"file": f},
598
+ data={"labels": json.dumps(labels)},
599
+ params=params,
600
+ )
567
601
  response_json = response.json()
568
- return response_json["content_id"]
602
+ content_id = response_json["content_id"]
603
+ return content_id
569
604
 
570
605
  def list_schemas(self) -> List[str]:
571
606
  """
@@ -574,35 +609,32 @@ class IndexifyClient:
574
609
  response = self.get(f"namespaces/{self.namespace}/schemas")
575
610
  return response.json()
576
611
 
577
- def get_content_tree(self, content_id: str):
612
+ def get_extracted_content(
613
+ self, ingested_content_id: str, graph_name: str, extractor_name: str, blocking=False
614
+ ):
578
615
  """
579
- Get content tree for a given content id
616
+ Get list of child for a given content id and their content up to the specified level.
580
617
 
581
618
  Args:
582
- - content_id (str): id of content
619
+ - ingested_content_id (str): id of content
620
+ - graph_name (str): name of extraction graph
621
+ - extractor_name (str): name of extractor
622
+ - blocking (bool): wait for extraction to complete before returning (default: False)
583
623
  """
624
+ if blocking:
625
+ self.wait_for_extraction(ingested_content_id)
584
626
  response = self.get(
585
- f"namespaces/{self.namespace}/content/{content_id}/content-tree"
627
+ f"namespaces/{self.namespace}/extraction_graphs/{graph_name}/extraction_policies/{extractor_name}/content/{ingested_content_id}"
586
628
  )
587
- return response.json()
588
-
589
- def get_extracted_content(self, content_id: str, graph_name: str, policy_name: str):
590
- """
591
- Get list of child for a given content id and their content up to the specified level.
592
-
593
- Args:
594
- - content_id (str): id of content
595
- - level (int): depth of content retrieval (default: 0)
596
- """
597
- content_tree = self.get_content_tree(content_id)
629
+ content_tree = response.json()
598
630
  child_list = []
599
631
  for item in content_tree["content_tree_metadata"]:
600
632
  if (
601
633
  graph_name in item["extraction_graph_names"]
602
- and item["source"] == policy_name
634
+ and item["source"] == extractor_name
603
635
  ):
604
636
  content = self.download_content(item["id"])
605
- child_list.append({"id": item["id"], "content": content})
637
+ child_list.append({"id": item["id"], "mime_type": item["mime_type"], "content": content})
606
638
 
607
639
  return child_list
608
640
 
@@ -660,9 +692,13 @@ class IndexifyClient:
660
692
  """
661
693
  if type(content_ids) == str:
662
694
  content_ids = [content_ids]
663
- print("Waiting for extraction to complete for content id: ", ",".join(content_ids))
695
+ print(
696
+ "Waiting for extraction to complete for content id: ", ",".join(content_ids)
697
+ )
664
698
  for content_id in content_ids:
665
- response = self.get(f"namespaces/{self.namespace}/content/{content_id}/wait")
699
+ response = self.get(
700
+ f"namespaces/{self.namespace}/content/{content_id}/wait"
701
+ )
666
702
  print("Extraction completed for content id: ", content_id)
667
703
  response.raise_for_status()
668
704
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.0.29"
3
+ version = "0.0.31"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Lucas Jackson <lucas@tensorlake.ai>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
6
6
  license = "Apache 2.0"
File without changes
File without changes
File without changes
File without changes