indexify 0.0.15__tar.gz → 0.0.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.15
3
+ Version: 0.0.17
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,4 +1,7 @@
1
+ import yaml
1
2
  import httpx
3
+ import uuid
4
+ import hashlib
2
5
  import json
3
6
  from collections import namedtuple
4
7
  from .settings import DEFAULT_SERVICE_URL
@@ -12,7 +15,7 @@ from dataclasses import dataclass
12
15
 
13
16
  from typing import List, Optional, Union, Dict
14
17
 
15
- Document = namedtuple("Document", ["text", "labels"])
18
+ Document = namedtuple("Document", ["text", "labels", "id"])
16
19
 
17
20
  SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
18
21
 
@@ -42,16 +45,32 @@ class IndexifyClient:
42
45
 
43
46
  def __init__(
44
47
  self,
45
- service_url: str = DEFAULT_SERVICE_URL,
48
+ service_url: str = DEFAULT_SERVICE_URL, # switch this to DEFAULT_SERVICE_URL_HTTPS for TLS
46
49
  namespace: str = "default",
50
+ config_path: Optional[str] = None,
47
51
  *args,
48
52
  **kwargs,
49
53
  ):
54
+ if config_path:
55
+ with open(config_path, 'r') as file:
56
+ config = yaml.safe_load(file)
57
+
58
+ if config.get('use_tls', False):
59
+ tls_config = config['tls_config']
60
+ self._client = httpx.Client(
61
+ http2=True,
62
+ cert=(tls_config['cert_path'], tls_config['key_path']),
63
+ verify=tls_config.get('ca_bundle_path', True)
64
+ )
65
+ else:
66
+ self._client = httpx.Client(*args, **kwargs)
67
+ else:
68
+ self._client = httpx.Client(*args, **kwargs)
69
+
50
70
  self.namespace: str = namespace
51
71
  self.extraction_policies: List[ExtractionPolicy] = []
52
72
  self.labels: dict = {}
53
73
  self._service_url = service_url
54
- self._client = httpx.Client(*args, **kwargs)
55
74
 
56
75
  # get namespace data
57
76
  response = self.get(f"namespaces/{self.namespace}")
@@ -396,7 +415,7 @@ class IndexifyClient:
396
415
  raise ApiException(exc.response.text)
397
416
 
398
417
  def add_documents(
399
- self, documents: Union[Document, str, List[Union[Document, str]]]
418
+ self, documents: Union[Document, str, List[Union[Document, str]]], doc_id=None
400
419
  ) -> None:
401
420
  """
402
421
  Add documents to current namespace.
@@ -407,14 +426,14 @@ class IndexifyClient:
407
426
  if isinstance(documents, Document):
408
427
  documents = [documents]
409
428
  elif isinstance(documents, str):
410
- documents = [Document(documents, {})]
429
+ documents = [Document(documents, {}, id=doc_id)]
411
430
  elif isinstance(documents, list):
412
431
  new_documents = []
413
432
  for item in documents:
414
433
  if isinstance(item, Document):
415
434
  new_documents.append(item)
416
435
  elif isinstance(item, str):
417
- new_documents.append(Document(item, {}))
436
+ new_documents.append(Document(item, {}, id=None)) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
418
437
  else:
419
438
  raise ValueError(
420
439
  "List items must be either Document instances or strings."
@@ -425,7 +444,7 @@ class IndexifyClient:
425
444
  "Invalid type for documents. Expected Document, str, or list of these."
426
445
  )
427
446
 
428
- req = {"documents": documents}
447
+ req = {"documents": [doc._asdict() for doc in documents]}
429
448
  response = self.post(
430
449
  f"namespaces/{self.namespace}/add_texts",
431
450
  json=req,
@@ -470,7 +489,7 @@ class IndexifyClient:
470
489
  response.raise_for_status()
471
490
  return response.json().get("metadata",[])
472
491
 
473
- def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
492
+ def search_index(self, name: str, query: str, top_k: int, filters: List[str] = []) -> list[TextChunk]:
474
493
  """
475
494
  Search index in the current namespace.
476
495
 
@@ -478,8 +497,9 @@ class IndexifyClient:
478
497
  - name (str): name of index to search
479
498
  - query (str): query string
480
499
  - top_k (int): top k nearest neighbors to be returned
500
+ - filters (List[str]): list of filters to apply
481
501
  """
482
- req = {"index": name, "query": query, "k": top_k}
502
+ req = {"index": name, "query": query, "k": top_k, "filters": filters}
483
503
  response = self.post(
484
504
  f"namespaces/{self.namespace}/search",
485
505
  json=req,
@@ -488,20 +508,28 @@ class IndexifyClient:
488
508
  response.raise_for_status()
489
509
  return response.json()["results"]
490
510
 
491
- def upload_file(self, path: str):
511
+ def upload_file(self, path: str, id=None, labels: dict = {}) -> str:
492
512
  """
493
513
  Upload a file.
494
514
 
495
515
  Args:
496
516
  - path (str): relative path to the file to be uploaded
517
+ - labels (dict): labels to be associated with the file
497
518
  """
519
+ params={}
520
+ if id is not None:
521
+ params['id'] = id
498
522
  with open(path, "rb") as f:
499
523
  response = self.post(
500
524
  f"namespaces/{self.namespace}/upload_file",
501
525
  files={"file": f},
526
+ data=labels,
527
+ params=params,
502
528
  timeout=None,
503
529
  )
504
530
  response.raise_for_status()
531
+ response_json = response.json()
532
+ return response_json["content_id"]
505
533
 
506
534
  def list_schemas(self) -> List[str]:
507
535
  """
@@ -541,4 +569,27 @@ class IndexifyClient:
541
569
  )
542
570
  response.raise_for_status()
543
571
  return response.json()
572
+
573
+ def generate_unique_hex_id(self):
574
+ """
575
+ Generate a unique hexadecimal identifier
576
+
577
+ Returns:
578
+ str: a unique hexadecimal string
579
+ """
580
+ return uuid.uuid4().hex[:16]
581
+
582
+ def generate_hash_from_string(self, input_string: str):
583
+ """
584
+ Generate a hash for the given string and return it as a hexadecimal string.
585
+
586
+ Args:
587
+ input_string (str): The input string to hash.
588
+
589
+ Returns:
590
+ str: The hexadecimal hash of the input string.
591
+ """
592
+ hash_object = hashlib.sha256(input_string.encode())
593
+ return hash_object.hexdigest()[:16]
594
+
544
595
 
@@ -0,0 +1,2 @@
1
+ DEFAULT_SERVICE_URL = "http://localhost:8900"
2
+ DEFAULT_SERVICE_URL_HTTPS = "https://localhost:8900"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
- version = "0.0.15"
3
+ version = "0.0.17"
4
4
  description = "Python Client for Indexify"
5
5
  authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
6
6
  license = "Apache 2.0"
@@ -1 +0,0 @@
1
- DEFAULT_SERVICE_URL = "http://localhost:8900"
File without changes
File without changes
File without changes
File without changes