indexify 0.0.15__py3-none-any.whl → 0.0.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indexify/client.py +61 -10
- indexify/settings.py +1 -0
- {indexify-0.0.15.dist-info → indexify-0.0.17.dist-info}/METADATA +1 -1
- {indexify-0.0.15.dist-info → indexify-0.0.17.dist-info}/RECORD +6 -6
- {indexify-0.0.15.dist-info → indexify-0.0.17.dist-info}/LICENSE.txt +0 -0
- {indexify-0.0.15.dist-info → indexify-0.0.17.dist-info}/WHEEL +0 -0
indexify/client.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
import yaml
|
1
2
|
import httpx
|
3
|
+
import uuid
|
4
|
+
import hashlib
|
2
5
|
import json
|
3
6
|
from collections import namedtuple
|
4
7
|
from .settings import DEFAULT_SERVICE_URL
|
@@ -12,7 +15,7 @@ from dataclasses import dataclass
|
|
12
15
|
|
13
16
|
from typing import List, Optional, Union, Dict
|
14
17
|
|
15
|
-
Document = namedtuple("Document", ["text", "labels"])
|
18
|
+
Document = namedtuple("Document", ["text", "labels", "id"])
|
16
19
|
|
17
20
|
SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
|
18
21
|
|
@@ -42,16 +45,32 @@ class IndexifyClient:
|
|
42
45
|
|
43
46
|
def __init__(
|
44
47
|
self,
|
45
|
-
service_url: str = DEFAULT_SERVICE_URL,
|
48
|
+
service_url: str = DEFAULT_SERVICE_URL, # switch this to DEFAULT_SERVICE_URL_HTTPS for TLS
|
46
49
|
namespace: str = "default",
|
50
|
+
config_path: Optional[str] = None,
|
47
51
|
*args,
|
48
52
|
**kwargs,
|
49
53
|
):
|
54
|
+
if config_path:
|
55
|
+
with open(config_path, 'r') as file:
|
56
|
+
config = yaml.safe_load(file)
|
57
|
+
|
58
|
+
if config.get('use_tls', False):
|
59
|
+
tls_config = config['tls_config']
|
60
|
+
self._client = httpx.Client(
|
61
|
+
http2=True,
|
62
|
+
cert=(tls_config['cert_path'], tls_config['key_path']),
|
63
|
+
verify=tls_config.get('ca_bundle_path', True)
|
64
|
+
)
|
65
|
+
else:
|
66
|
+
self._client = httpx.Client(*args, **kwargs)
|
67
|
+
else:
|
68
|
+
self._client = httpx.Client(*args, **kwargs)
|
69
|
+
|
50
70
|
self.namespace: str = namespace
|
51
71
|
self.extraction_policies: List[ExtractionPolicy] = []
|
52
72
|
self.labels: dict = {}
|
53
73
|
self._service_url = service_url
|
54
|
-
self._client = httpx.Client(*args, **kwargs)
|
55
74
|
|
56
75
|
# get namespace data
|
57
76
|
response = self.get(f"namespaces/{self.namespace}")
|
@@ -396,7 +415,7 @@ class IndexifyClient:
|
|
396
415
|
raise ApiException(exc.response.text)
|
397
416
|
|
398
417
|
def add_documents(
|
399
|
-
self, documents: Union[Document, str, List[Union[Document, str]]]
|
418
|
+
self, documents: Union[Document, str, List[Union[Document, str]]], doc_id=None
|
400
419
|
) -> None:
|
401
420
|
"""
|
402
421
|
Add documents to current namespace.
|
@@ -407,14 +426,14 @@ class IndexifyClient:
|
|
407
426
|
if isinstance(documents, Document):
|
408
427
|
documents = [documents]
|
409
428
|
elif isinstance(documents, str):
|
410
|
-
documents = [Document(documents, {})]
|
429
|
+
documents = [Document(documents, {}, id=doc_id)]
|
411
430
|
elif isinstance(documents, list):
|
412
431
|
new_documents = []
|
413
432
|
for item in documents:
|
414
433
|
if isinstance(item, Document):
|
415
434
|
new_documents.append(item)
|
416
435
|
elif isinstance(item, str):
|
417
|
-
new_documents.append(Document(item, {}))
|
436
|
+
new_documents.append(Document(item, {}, id=None)) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
|
418
437
|
else:
|
419
438
|
raise ValueError(
|
420
439
|
"List items must be either Document instances or strings."
|
@@ -425,7 +444,7 @@ class IndexifyClient:
|
|
425
444
|
"Invalid type for documents. Expected Document, str, or list of these."
|
426
445
|
)
|
427
446
|
|
428
|
-
req = {"documents": documents}
|
447
|
+
req = {"documents": [doc._asdict() for doc in documents]}
|
429
448
|
response = self.post(
|
430
449
|
f"namespaces/{self.namespace}/add_texts",
|
431
450
|
json=req,
|
@@ -470,7 +489,7 @@ class IndexifyClient:
|
|
470
489
|
response.raise_for_status()
|
471
490
|
return response.json().get("metadata",[])
|
472
491
|
|
473
|
-
def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
|
492
|
+
def search_index(self, name: str, query: str, top_k: int, filters: List[str] = []) -> list[TextChunk]:
|
474
493
|
"""
|
475
494
|
Search index in the current namespace.
|
476
495
|
|
@@ -478,8 +497,9 @@ class IndexifyClient:
|
|
478
497
|
- name (str): name of index to search
|
479
498
|
- query (str): query string
|
480
499
|
- top_k (int): top k nearest neighbors to be returned
|
500
|
+
- filters (List[str]): list of filters to apply
|
481
501
|
"""
|
482
|
-
req = {"index": name, "query": query, "k": top_k}
|
502
|
+
req = {"index": name, "query": query, "k": top_k, "filters": filters}
|
483
503
|
response = self.post(
|
484
504
|
f"namespaces/{self.namespace}/search",
|
485
505
|
json=req,
|
@@ -488,20 +508,28 @@ class IndexifyClient:
|
|
488
508
|
response.raise_for_status()
|
489
509
|
return response.json()["results"]
|
490
510
|
|
491
|
-
def upload_file(self, path: str):
|
511
|
+
def upload_file(self, path: str, id=None, labels: dict = {}) -> str:
|
492
512
|
"""
|
493
513
|
Upload a file.
|
494
514
|
|
495
515
|
Args:
|
496
516
|
- path (str): relative path to the file to be uploaded
|
517
|
+
- labels (dict): labels to be associated with the file
|
497
518
|
"""
|
519
|
+
params={}
|
520
|
+
if id is not None:
|
521
|
+
params['id'] = id
|
498
522
|
with open(path, "rb") as f:
|
499
523
|
response = self.post(
|
500
524
|
f"namespaces/{self.namespace}/upload_file",
|
501
525
|
files={"file": f},
|
526
|
+
data=labels,
|
527
|
+
params=params,
|
502
528
|
timeout=None,
|
503
529
|
)
|
504
530
|
response.raise_for_status()
|
531
|
+
response_json = response.json()
|
532
|
+
return response_json["content_id"]
|
505
533
|
|
506
534
|
def list_schemas(self) -> List[str]:
|
507
535
|
"""
|
@@ -541,4 +569,27 @@ class IndexifyClient:
|
|
541
569
|
)
|
542
570
|
response.raise_for_status()
|
543
571
|
return response.json()
|
572
|
+
|
573
|
+
def generate_unique_hex_id(self):
|
574
|
+
"""
|
575
|
+
Generate a unique hexadecimal identifier
|
576
|
+
|
577
|
+
Returns:
|
578
|
+
str: a unique hexadecimal string
|
579
|
+
"""
|
580
|
+
return uuid.uuid4().hex[:16]
|
581
|
+
|
582
|
+
def generate_hash_from_string(self, input_string: str):
|
583
|
+
"""
|
584
|
+
Generate a hash for the given string and return it as a hexadecimal string.
|
585
|
+
|
586
|
+
Args:
|
587
|
+
input_string (str): The input string to hash.
|
588
|
+
|
589
|
+
Returns:
|
590
|
+
str: The hexadecimal hash of the input string.
|
591
|
+
"""
|
592
|
+
hash_object = hashlib.sha256(input_string.encode())
|
593
|
+
return hash_object.hexdigest()[:16]
|
594
|
+
|
544
595
|
|
indexify/settings.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
indexify/__init__.py,sha256=Sz6zkAIHsPOi0rG5RM7dVkXGDa0fO2uurD6vS4Qo15E,312
|
2
|
-
indexify/client.py,sha256=
|
2
|
+
indexify/client.py,sha256=E5hIDcxT3zobky4-kCpi2R65Fiy8p_UjDr0r1oorY4E,19141
|
3
3
|
indexify/data_containers.py,sha256=r1wxJPtsmXbyKvb17fqxm-dPjKz51oZ62f8A8Zxls1c,361
|
4
4
|
indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
|
5
5
|
indexify/extraction_policy.py,sha256=vKVHT8jSjzhUaKqWpewOGkYojMBplvGdSm9zoSN9Pcg,750
|
6
6
|
indexify/extractor.py,sha256=KMcP9xopHJRBzeSxalztGGTBvOzVKRFEsJynV-hLRSc,1175
|
7
7
|
indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
|
8
|
-
indexify/settings.py,sha256=
|
8
|
+
indexify/settings.py,sha256=UXUd6hYlDALPPjUCFvFkvUmsm7HwXAluWowCjZWoxjY,98
|
9
9
|
indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
|
10
|
-
indexify-0.0.
|
11
|
-
indexify-0.0.
|
12
|
-
indexify-0.0.
|
13
|
-
indexify-0.0.
|
10
|
+
indexify-0.0.17.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
11
|
+
indexify-0.0.17.dist-info/METADATA,sha256=k3HKuWAZNAXgTHmzaoby64lzk8BCWHKA3uYeGwu9F28,1714
|
12
|
+
indexify-0.0.17.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
13
|
+
indexify-0.0.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|