indexify 0.0.15__tar.gz → 0.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.15
3
+ Version: 0.0.16
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,4 +1,7 @@
1
+ import yaml
1
2
  import httpx
3
+ import uuid
4
+ import hashlib
2
5
  import json
3
6
  from collections import namedtuple
4
7
  from .settings import DEFAULT_SERVICE_URL
@@ -12,7 +15,7 @@ from dataclasses import dataclass
12
15
 
13
16
  from typing import List, Optional, Union, Dict
14
17
 
15
- Document = namedtuple("Document", ["text", "labels"])
18
+ Document = namedtuple("Document", ["text", "labels", "id"])
16
19
 
17
20
  SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
18
21
 
@@ -44,14 +47,30 @@ class IndexifyClient:
44
47
  self,
45
48
  service_url: str = DEFAULT_SERVICE_URL,
46
49
  namespace: str = "default",
50
+ config_path: Optional[str] = None,
47
51
  *args,
48
52
  **kwargs,
49
53
  ):
54
+ if config_path:
55
+ with open(config_path, 'r') as file:
56
+ config = yaml.safe_load(file)
57
+
58
+ if config.get('use_tls', False):
59
+ tls_config = config['tls_config']
60
+ self._client = httpx.Client(
61
+ http2=True,
62
+ cert=(tls_config['cert_path'], tls_config['key_path']),
63
+ verify=tls_config.get('ca_bundle_path', True)
64
+ )
65
+ else:
66
+ self._client = httpx.Client(*args, **kwargs)
67
+ else:
68
+ self._client = httpx.Client(*args, **kwargs)
69
+
50
70
  self.namespace: str = namespace
51
71
  self.extraction_policies: List[ExtractionPolicy] = []
52
72
  self.labels: dict = {}
53
73
  self._service_url = service_url
54
- self._client = httpx.Client(*args, **kwargs)
55
74
 
56
75
  # get namespace data
57
76
  response = self.get(f"namespaces/{self.namespace}")
@@ -396,7 +415,7 @@ class IndexifyClient:
396
415
  raise ApiException(exc.response.text)
397
416
 
398
417
  def add_documents(
399
- self, documents: Union[Document, str, List[Union[Document, str]]]
418
+ self, documents: Union[Document, str, List[Union[Document, str]]], doc_id=None
400
419
  ) -> None:
401
420
  """
402
421
  Add documents to current namespace.
@@ -407,14 +426,14 @@ class IndexifyClient:
407
426
  if isinstance(documents, Document):
408
427
  documents = [documents]
409
428
  elif isinstance(documents, str):
410
- documents = [Document(documents, {})]
429
+ documents = [Document(documents, {}, id=doc_id)]
411
430
  elif isinstance(documents, list):
412
431
  new_documents = []
413
432
  for item in documents:
414
433
  if isinstance(item, Document):
415
434
  new_documents.append(item)
416
435
  elif isinstance(item, str):
417
- new_documents.append(Document(item, {}))
436
+ new_documents.append(Document(item, {}, id=None)) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
418
437
  else:
419
438
  raise ValueError(
420
439
  "List items must be either Document instances or strings."
@@ -425,7 +444,7 @@ class IndexifyClient:
425
444
  "Invalid type for documents. Expected Document, str, or list of these."
426
445
  )
427
446
 
428
- req = {"documents": documents}
447
+ req = {"documents": [doc._asdict() for doc in documents]}
429
448
  response = self.post(
430
449
  f"namespaces/{self.namespace}/add_texts",
431
450
  json=req,
@@ -470,7 +489,7 @@ class IndexifyClient:
470
489
  response.raise_for_status()
471
490
  return response.json().get("metadata",[])
472
491
 
473
- def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
492
+ def search_index(self, name: str, query: str, top_k: int, filters: List[str] = None) -> list[TextChunk]:
474
493
  """
475
494
  Search index in the current namespace.
476
495
 
@@ -478,8 +497,11 @@ class IndexifyClient:
478
497
  - name (str): name of index to search
479
498
  - query (str): query string
480
499
  - top_k (int): top k nearest neighbors to be returned
500
+ - filters (List[str]): list of filters to apply
481
501
  """
482
- req = {"index": name, "query": query, "k": top_k}
502
+ if filters is None:
503
+ filters = []
504
+ req = {"index": name, "query": query, "k": top_k, "filters": filters}
483
505
  response = self.post(
484
506
  f"namespaces/{self.namespace}/search",
485
507
  json=req,
@@ -488,17 +510,23 @@ class IndexifyClient:
488
510
  response.raise_for_status()
489
511
  return response.json()["results"]
490
512
 
491
- def upload_file(self, path: str):
513
+ def upload_file(self, path: str, id=None, labels: dict = {}):
492
514
  """
493
515
  Upload a file.
494
516
 
495
517
  Args:
496
518
  - path (str): relative path to the file to be uploaded
519
+ - labels (dict): labels to be associated with the file
497
520
  """
521
+ params={}
522
+ if id is not None:
523
+ params['id'] = id
498
524
  with open(path, "rb") as f:
499
525
  response = self.post(
500
526
  f"namespaces/{self.namespace}/upload_file",
501
527
  files={"file": f},
528
+ data=labels,
529
+ params=params,
502
530
  timeout=None,
503
531
  )
504
532
  response.raise_for_status()
@@ -541,4 +569,27 @@ class IndexifyClient:
541
569
  )
542
570
  response.raise_for_status()
543
571
  return response.json()
572
+
573
+ def generate_unique_hex_id(self):
574
+ """
575
+ Generate a unique hexadecimal identifier
576
+
577
+ Returns:
578
+ str: a unique hexadecimal string
579
+ """
580
+ return uuid.uuid4().hex[:16]
581
+
582
+ def generate_hash_from_string(self, input_string: str):
583
+ """
584
+ Generate a hash for the given string and return it as a hexadecimal string.
585
+
586
+ Args:
587
+ input_string (str): The input string to hash.
588
+
589
+ Returns:
590
+ str: The hexadecimal hash of the input string.
591
+ """
592
+ hash_object = hashlib.sha256(input_string.encode())
593
+ return hash_object.hexdigest()[:16]
594
+
544
595
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.0.15"
3
+ version = "0.0.16"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
6
6
  license = "Apache 2.0"
File without changes
File without changes
File without changes
File without changes