indexify 0.0.15__tar.gz → 0.0.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.0.15 → indexify-0.0.16}/PKG-INFO +1 -1
- {indexify-0.0.15 → indexify-0.0.16}/indexify/client.py +60 -9
- {indexify-0.0.15 → indexify-0.0.16}/pyproject.toml +1 -1
- {indexify-0.0.15 → indexify-0.0.16}/LICENSE.txt +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/README.md +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/__init__.py +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/data_containers.py +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/exceptions.py +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/extraction_policy.py +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/extractor.py +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/index.py +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/settings.py +0 -0
- {indexify-0.0.15 → indexify-0.0.16}/indexify/utils.py +0 -0
@@ -1,4 +1,7 @@
|
|
1
|
+
import yaml
|
1
2
|
import httpx
|
3
|
+
import uuid
|
4
|
+
import hashlib
|
2
5
|
import json
|
3
6
|
from collections import namedtuple
|
4
7
|
from .settings import DEFAULT_SERVICE_URL
|
@@ -12,7 +15,7 @@ from dataclasses import dataclass
|
|
12
15
|
|
13
16
|
from typing import List, Optional, Union, Dict
|
14
17
|
|
15
|
-
Document = namedtuple("Document", ["text", "labels"])
|
18
|
+
Document = namedtuple("Document", ["text", "labels", "id"])
|
16
19
|
|
17
20
|
SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
|
18
21
|
|
@@ -44,14 +47,30 @@ class IndexifyClient:
|
|
44
47
|
self,
|
45
48
|
service_url: str = DEFAULT_SERVICE_URL,
|
46
49
|
namespace: str = "default",
|
50
|
+
config_path: Optional[str] = None,
|
47
51
|
*args,
|
48
52
|
**kwargs,
|
49
53
|
):
|
54
|
+
if config_path:
|
55
|
+
with open(config_path, 'r') as file:
|
56
|
+
config = yaml.safe_load(file)
|
57
|
+
|
58
|
+
if config.get('use_tls', False):
|
59
|
+
tls_config = config['tls_config']
|
60
|
+
self._client = httpx.Client(
|
61
|
+
http2=True,
|
62
|
+
cert=(tls_config['cert_path'], tls_config['key_path']),
|
63
|
+
verify=tls_config.get('ca_bundle_path', True)
|
64
|
+
)
|
65
|
+
else:
|
66
|
+
self._client = httpx.Client(*args, **kwargs)
|
67
|
+
else:
|
68
|
+
self._client = httpx.Client(*args, **kwargs)
|
69
|
+
|
50
70
|
self.namespace: str = namespace
|
51
71
|
self.extraction_policies: List[ExtractionPolicy] = []
|
52
72
|
self.labels: dict = {}
|
53
73
|
self._service_url = service_url
|
54
|
-
self._client = httpx.Client(*args, **kwargs)
|
55
74
|
|
56
75
|
# get namespace data
|
57
76
|
response = self.get(f"namespaces/{self.namespace}")
|
@@ -396,7 +415,7 @@ class IndexifyClient:
|
|
396
415
|
raise ApiException(exc.response.text)
|
397
416
|
|
398
417
|
def add_documents(
|
399
|
-
self, documents: Union[Document, str, List[Union[Document, str]]]
|
418
|
+
self, documents: Union[Document, str, List[Union[Document, str]]], doc_id=None
|
400
419
|
) -> None:
|
401
420
|
"""
|
402
421
|
Add documents to current namespace.
|
@@ -407,14 +426,14 @@ class IndexifyClient:
|
|
407
426
|
if isinstance(documents, Document):
|
408
427
|
documents = [documents]
|
409
428
|
elif isinstance(documents, str):
|
410
|
-
documents = [Document(documents, {})]
|
429
|
+
documents = [Document(documents, {}, id=doc_id)]
|
411
430
|
elif isinstance(documents, list):
|
412
431
|
new_documents = []
|
413
432
|
for item in documents:
|
414
433
|
if isinstance(item, Document):
|
415
434
|
new_documents.append(item)
|
416
435
|
elif isinstance(item, str):
|
417
|
-
new_documents.append(Document(item, {}))
|
436
|
+
new_documents.append(Document(item, {}, id=None)) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
|
418
437
|
else:
|
419
438
|
raise ValueError(
|
420
439
|
"List items must be either Document instances or strings."
|
@@ -425,7 +444,7 @@ class IndexifyClient:
|
|
425
444
|
"Invalid type for documents. Expected Document, str, or list of these."
|
426
445
|
)
|
427
446
|
|
428
|
-
req = {"documents": documents}
|
447
|
+
req = {"documents": [doc._asdict() for doc in documents]}
|
429
448
|
response = self.post(
|
430
449
|
f"namespaces/{self.namespace}/add_texts",
|
431
450
|
json=req,
|
@@ -470,7 +489,7 @@ class IndexifyClient:
|
|
470
489
|
response.raise_for_status()
|
471
490
|
return response.json().get("metadata",[])
|
472
491
|
|
473
|
-
def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
|
492
|
+
def search_index(self, name: str, query: str, top_k: int, filters: List[str] = None) -> list[TextChunk]:
|
474
493
|
"""
|
475
494
|
Search index in the current namespace.
|
476
495
|
|
@@ -478,8 +497,11 @@ class IndexifyClient:
|
|
478
497
|
- name (str): name of index to search
|
479
498
|
- query (str): query string
|
480
499
|
- top_k (int): top k nearest neighbors to be returned
|
500
|
+
- filters (List[str]): list of filters to apply
|
481
501
|
"""
|
482
|
-
|
502
|
+
if filters is None:
|
503
|
+
filters = []
|
504
|
+
req = {"index": name, "query": query, "k": top_k, "filters": filters}
|
483
505
|
response = self.post(
|
484
506
|
f"namespaces/{self.namespace}/search",
|
485
507
|
json=req,
|
@@ -488,17 +510,23 @@ class IndexifyClient:
|
|
488
510
|
response.raise_for_status()
|
489
511
|
return response.json()["results"]
|
490
512
|
|
491
|
-
def upload_file(self, path: str):
|
513
|
+
def upload_file(self, path: str, id=None, labels: dict = {}):
|
492
514
|
"""
|
493
515
|
Upload a file.
|
494
516
|
|
495
517
|
Args:
|
496
518
|
- path (str): relative path to the file to be uploaded
|
519
|
+
- labels (dict): labels to be associated with the file
|
497
520
|
"""
|
521
|
+
params={}
|
522
|
+
if id is not None:
|
523
|
+
params['id'] = id
|
498
524
|
with open(path, "rb") as f:
|
499
525
|
response = self.post(
|
500
526
|
f"namespaces/{self.namespace}/upload_file",
|
501
527
|
files={"file": f},
|
528
|
+
data=labels,
|
529
|
+
params=params,
|
502
530
|
timeout=None,
|
503
531
|
)
|
504
532
|
response.raise_for_status()
|
@@ -541,4 +569,27 @@ class IndexifyClient:
|
|
541
569
|
)
|
542
570
|
response.raise_for_status()
|
543
571
|
return response.json()
|
572
|
+
|
573
|
+
def generate_unique_hex_id(self):
|
574
|
+
"""
|
575
|
+
Generate a unique hexadecimal identifier
|
576
|
+
|
577
|
+
Returns:
|
578
|
+
str: a unique hexadecimal string
|
579
|
+
"""
|
580
|
+
return uuid.uuid4().hex[:16]
|
581
|
+
|
582
|
+
def generate_hash_from_string(self, input_string: str):
|
583
|
+
"""
|
584
|
+
Generate a hash for the given string and return it as a hexadecimal string.
|
585
|
+
|
586
|
+
Args:
|
587
|
+
input_string (str): The input string to hash.
|
588
|
+
|
589
|
+
Returns:
|
590
|
+
str: The hexadecimal hash of the input string.
|
591
|
+
"""
|
592
|
+
hash_object = hashlib.sha256(input_string.encode())
|
593
|
+
return hash_object.hexdigest()[:16]
|
594
|
+
|
544
595
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|