indexify 0.0.29__py3-none-any.whl → 0.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/client.py +89 -53
- {indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/METADATA +3 -3
- {indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/RECORD +5 -5
- {indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.29.dist-info → indexify-0.0.32.dist-info}/WHEEL +0 -0
indexify/client.py
CHANGED
@@ -154,7 +154,9 @@ class IndexifyClient:
|
|
154
154
|
response = self._client.request(method, timeout=self._timeout, **kwargs)
|
155
155
|
status_code = str(response.status_code)
|
156
156
|
if status_code.startswith("4"):
|
157
|
-
raise ApiException(
|
157
|
+
raise ApiException(
|
158
|
+
"status code: " + status_code + " request args: " + str(kwargs)
|
159
|
+
)
|
158
160
|
if status_code.startswith("5"):
|
159
161
|
raise ApiException(response.text)
|
160
162
|
# error = Error.from_tonic_error_string(str(response.url), response.text)
|
@@ -342,11 +344,11 @@ class IndexifyClient:
|
|
342
344
|
"""
|
343
345
|
Retrieve and update the list of extraction policies for the current namespace.
|
344
346
|
"""
|
345
|
-
response = self.get(f"namespaces/{self.namespace}")
|
347
|
+
response = self.get(f"namespaces/{self.namespace}/extraction_graphs")
|
346
348
|
json = response.json()
|
347
349
|
|
348
350
|
self.extraction_graphs = []
|
349
|
-
for graph in json["
|
351
|
+
for graph in json["extraction_graphs"]:
|
350
352
|
self.extraction_graphs.append(ExtractionGraph.from_dict(graph))
|
351
353
|
|
352
354
|
return self.extraction_graphs
|
@@ -368,6 +370,28 @@ class IndexifyClient:
|
|
368
370
|
)
|
369
371
|
return
|
370
372
|
|
373
|
+
def link_extraction_graphs(
|
374
|
+
self, source_graph: str, content_source: str, linked_graph: str
|
375
|
+
):
|
376
|
+
"""
|
377
|
+
Link an extraction graph to another extraction graph.
|
378
|
+
|
379
|
+
Args:
|
380
|
+
- source_graph (str): source extraction graph
|
381
|
+
- content_source (str): content source in source graph
|
382
|
+
- linked_graph (str): target extraction graph
|
383
|
+
"""
|
384
|
+
req = {
|
385
|
+
"content_source": content_source,
|
386
|
+
"linked_graph_name": linked_graph,
|
387
|
+
}
|
388
|
+
response = self.post(
|
389
|
+
f"namespaces/{self.namespace}/extraction_graphs/{source_graph}/links",
|
390
|
+
json=req,
|
391
|
+
headers={"Content-Type": "application/json"},
|
392
|
+
)
|
393
|
+
return
|
394
|
+
|
371
395
|
def get_content_metadata(self, content_id: str) -> dict:
|
372
396
|
"""
|
373
397
|
Get metadata for a specific content ID in a given index.
|
@@ -375,17 +399,17 @@ class IndexifyClient:
|
|
375
399
|
Args:
|
376
400
|
- content_id (str): content id to query
|
377
401
|
"""
|
378
|
-
response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
|
402
|
+
response = self.get(f"namespaces/{self.namespace}/content/{content_id}/metadata")
|
379
403
|
return response.json()
|
380
404
|
|
381
|
-
def download_content(self,
|
405
|
+
def download_content(self, content_id: str) -> bytes:
|
382
406
|
"""
|
383
407
|
Download content from id. Return bytes
|
384
408
|
|
385
409
|
Args:
|
386
|
-
-
|
410
|
+
- content_id (str): id of content to download
|
387
411
|
"""
|
388
|
-
response = self.get(f"namespaces/{self.namespace}/content/{
|
412
|
+
response = self.get(f"namespaces/{self.namespace}/content/{content_id}/download")
|
389
413
|
return response.content
|
390
414
|
|
391
415
|
def add_documents(
|
@@ -424,21 +448,21 @@ class IndexifyClient:
|
|
424
448
|
raise TypeError(
|
425
449
|
"Invalid type for documents. Expected Document, str, or list of these."
|
426
450
|
)
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
451
|
+
for document in documents:
|
452
|
+
document.labels["mime_type"] = "text/plain"
|
453
|
+
content_ids = []
|
454
|
+
if isinstance(extraction_graphs, str):
|
455
|
+
extraction_graphs = [extraction_graphs]
|
456
|
+
for extraction_graph in extraction_graphs:
|
457
|
+
for document in documents:
|
458
|
+
response = self.post(
|
459
|
+
f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
|
460
|
+
files={"file": document.text},
|
461
|
+
data={"labels": json.dumps(document.labels)},
|
462
|
+
)
|
463
|
+
response_json = response.json()
|
464
|
+
content_id = response_json["content_id"]
|
465
|
+
content_ids.append(content_id)
|
442
466
|
return content_ids
|
443
467
|
|
444
468
|
def delete_documents(self, document_ids: List[str]) -> None:
|
@@ -506,15 +530,22 @@ class IndexifyClient:
|
|
506
530
|
- top_k (int): top k nearest neighbors to be returned
|
507
531
|
- filters (List[str]): list of filters to apply
|
508
532
|
"""
|
509
|
-
req = {"
|
533
|
+
req = {"query": query, "k": top_k, "filters": filters}
|
510
534
|
response = self.post(
|
511
|
-
f"namespaces/{self.namespace}/search",
|
535
|
+
f"namespaces/{self.namespace}/indexes/{name}/search",
|
512
536
|
json=req,
|
513
537
|
headers={"Content-Type": "application/json"},
|
514
538
|
)
|
515
539
|
return response.json()["results"]
|
516
|
-
|
517
|
-
def list_content(
|
540
|
+
|
541
|
+
def list_content(
|
542
|
+
self,
|
543
|
+
extraction_graph: str,
|
544
|
+
extraction_policy: str = "",
|
545
|
+
labels_filter: List[str] = [],
|
546
|
+
start_id: str = "",
|
547
|
+
limit: int = 10,
|
548
|
+
) -> List[Content]:
|
518
549
|
"""
|
519
550
|
List content in the current namespace.
|
520
551
|
|
@@ -528,6 +559,8 @@ class IndexifyClient:
|
|
528
559
|
params["source"] = extraction_policy
|
529
560
|
else:
|
530
561
|
params["source"] = "ingestion"
|
562
|
+
if len(labels_filter) > 0:
|
563
|
+
params["labels_filter"] = labels_filter
|
531
564
|
response = self.get(
|
532
565
|
f"namespaces/{self.namespace}/content",
|
533
566
|
params=params,
|
@@ -554,18 +587,20 @@ class IndexifyClient:
|
|
554
587
|
"""
|
555
588
|
if isinstance(extraction_graphs, str):
|
556
589
|
extraction_graphs = [extraction_graphs]
|
557
|
-
params = {
|
590
|
+
params = {}
|
558
591
|
if id is not None:
|
559
592
|
params["id"] = id
|
560
593
|
with open(path, "rb") as f:
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
594
|
+
for extraction_graph in extraction_graphs:
|
595
|
+
response = self.post(
|
596
|
+
f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
|
597
|
+
files={"file": f},
|
598
|
+
data={"labels": json.dumps(labels)},
|
599
|
+
params=params,
|
600
|
+
)
|
567
601
|
response_json = response.json()
|
568
|
-
|
602
|
+
content_id = response_json["content_id"]
|
603
|
+
return content_id
|
569
604
|
|
570
605
|
def list_schemas(self) -> List[str]:
|
571
606
|
"""
|
@@ -574,35 +609,32 @@ class IndexifyClient:
|
|
574
609
|
response = self.get(f"namespaces/{self.namespace}/schemas")
|
575
610
|
return response.json()
|
576
611
|
|
577
|
-
def
|
612
|
+
def get_extracted_content(
|
613
|
+
self, ingested_content_id: str, graph_name: str, extractor_name: str, blocking=False
|
614
|
+
):
|
578
615
|
"""
|
579
|
-
Get
|
616
|
+
Get list of child for a given content id and their content up to the specified level.
|
580
617
|
|
581
618
|
Args:
|
582
|
-
|
619
|
+
- ingested_content_id (str): id of content
|
620
|
+
- graph_name (str): name of extraction graph
|
621
|
+
- extractor_name (str): name of extractor
|
622
|
+
- blocking (bool): wait for extraction to complete before returning (default: False)
|
583
623
|
"""
|
624
|
+
if blocking:
|
625
|
+
self.wait_for_extraction(ingested_content_id)
|
584
626
|
response = self.get(
|
585
|
-
f"namespaces/{self.namespace}/
|
627
|
+
f"namespaces/{self.namespace}/extraction_graphs/{graph_name}/extraction_policies/{extractor_name}/content/{ingested_content_id}"
|
586
628
|
)
|
587
|
-
|
588
|
-
|
589
|
-
def get_extracted_content(self, content_id: str, graph_name: str, policy_name: str):
|
590
|
-
"""
|
591
|
-
Get list of child for a given content id and their content up to the specified level.
|
592
|
-
|
593
|
-
Args:
|
594
|
-
- content_id (str): id of content
|
595
|
-
- level (int): depth of content retrieval (default: 0)
|
596
|
-
"""
|
597
|
-
content_tree = self.get_content_tree(content_id)
|
629
|
+
content_tree = response.json()
|
598
630
|
child_list = []
|
599
631
|
for item in content_tree["content_tree_metadata"]:
|
600
632
|
if (
|
601
633
|
graph_name in item["extraction_graph_names"]
|
602
|
-
and item["source"] ==
|
634
|
+
and item["source"] == extractor_name
|
603
635
|
):
|
604
636
|
content = self.download_content(item["id"])
|
605
|
-
child_list.append({"id": item["id"], "content": content})
|
637
|
+
child_list.append({"id": item["id"], "mime_type": item["mime_type"], "content": content})
|
606
638
|
|
607
639
|
return child_list
|
608
640
|
|
@@ -660,9 +692,13 @@ class IndexifyClient:
|
|
660
692
|
"""
|
661
693
|
if type(content_ids) == str:
|
662
694
|
content_ids = [content_ids]
|
663
|
-
print(
|
695
|
+
print(
|
696
|
+
"Waiting for extraction to complete for content id: ", ",".join(content_ids)
|
697
|
+
)
|
664
698
|
for content_id in content_ids:
|
665
|
-
response = self.get(
|
699
|
+
response = self.get(
|
700
|
+
f"namespaces/{self.namespace}/content/{content_id}/wait"
|
701
|
+
)
|
666
702
|
print("Extraction completed for content id: ", content_id)
|
667
703
|
response.raise_for_status()
|
668
704
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.32
|
4
4
|
Summary: Python Client for Indexify
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -35,8 +35,8 @@ pip install indexify
|
|
35
35
|
|
36
36
|
## Usage
|
37
37
|
|
38
|
-
See the [getting started](https://getindexify.com/getting_started/) guide for examples of how to use the client.
|
39
|
-
Look at the [examples](examples) directory for more examples.
|
38
|
+
See the [getting started](https://docs.getindexify.com/getting_started/) guide for examples of how to use the client.
|
39
|
+
Look at the [examples](https://github.com/tensorlakeai/indexify/tree/main/examples) directory for more examples.
|
40
40
|
|
41
41
|
## Development
|
42
42
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
indexify/__init__.py,sha256=xqymbwqaiHiWXFpm7Cll2j-_V1lNQH2EEGlevtCTZK4,525
|
2
|
-
indexify/client.py,sha256=
|
2
|
+
indexify/client.py,sha256=YkNhM1xDe0VcPx9Z3yLdl3y_msoOrGAj3ykefcItVhE,24653
|
3
3
|
indexify/data_containers.py,sha256=fIX_rghpojrCUtmZ0grywoq_HWniDgN1mnR7yXDej-Y,874
|
4
4
|
indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
|
5
5
|
indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
|
@@ -8,7 +8,7 @@ indexify/extractor.py,sha256=sWFLlXHgEfWlmiKAXN6ytUt_uG7th-XGNHqz-TG39gs,1216
|
|
8
8
|
indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
|
9
9
|
indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
|
10
10
|
indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
|
11
|
-
indexify-0.0.
|
12
|
-
indexify-0.0.
|
13
|
-
indexify-0.0.
|
14
|
-
indexify-0.0.
|
11
|
+
indexify-0.0.32.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
12
|
+
indexify-0.0.32.dist-info/METADATA,sha256=Rj5F0dH8Ll6fRbwhiY0niW7JfcwWU-4F28pDRLp4w2s,1854
|
13
|
+
indexify-0.0.32.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
14
|
+
indexify-0.0.32.dist-info/RECORD,,
|
File without changes
|
File without changes
|