indexify 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/client.py CHANGED
@@ -1,4 +1,7 @@
1
+ import yaml
1
2
  import httpx
3
+ import uuid
4
+ import hashlib
2
5
  import json
3
6
  from collections import namedtuple
4
7
  from .settings import DEFAULT_SERVICE_URL
@@ -12,7 +15,7 @@ from dataclasses import dataclass
12
15
 
13
16
  from typing import List, Optional, Union, Dict
14
17
 
15
- Document = namedtuple("Document", ["text", "labels"])
18
+ Document = namedtuple("Document", ["text", "labels", "id"])
16
19
 
17
20
  SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
18
21
 
@@ -44,14 +47,30 @@ class IndexifyClient:
44
47
  self,
45
48
  service_url: str = DEFAULT_SERVICE_URL,
46
49
  namespace: str = "default",
50
+ config_path: Optional[str] = None,
47
51
  *args,
48
52
  **kwargs,
49
53
  ):
54
+ if config_path:
55
+ with open(config_path, 'r') as file:
56
+ config = yaml.safe_load(file)
57
+
58
+ if config.get('use_tls', False):
59
+ tls_config = config['tls_config']
60
+ self._client = httpx.Client(
61
+ http2=True,
62
+ cert=(tls_config['cert_path'], tls_config['key_path']),
63
+ verify=tls_config.get('ca_bundle_path', True)
64
+ )
65
+ else:
66
+ self._client = httpx.Client(*args, **kwargs)
67
+ else:
68
+ self._client = httpx.Client(*args, **kwargs)
69
+
50
70
  self.namespace: str = namespace
51
71
  self.extraction_policies: List[ExtractionPolicy] = []
52
72
  self.labels: dict = {}
53
73
  self._service_url = service_url
54
- self._client = httpx.Client(*args, **kwargs)
55
74
 
56
75
  # get namespace data
57
76
  response = self.get(f"namespaces/{self.namespace}")
@@ -349,11 +368,21 @@ class IndexifyClient:
349
368
  except httpx.HTTPStatusError as exc:
350
369
  raise ApiException(exc.response.text)
351
370
  return
371
+
372
+ def get_content_metadata(self, content_id: str) -> dict:
373
+ """
374
+ Get metadata for a specific content ID in a given index.
352
375
 
353
- def get_content(
376
+ Args:
377
+ - content_id (str): content id to query
378
+ """
379
+ response = self.get(f"namespaces/{self.namespace}/content/{content_id}")
380
+ response.raise_for_status()
381
+ return response.json()
382
+
383
+ def get_extracted_content(
354
384
  self,
355
- parent_id: str = None,
356
- labels_eq: str = None,
385
+ content_id: str = None,
357
386
  ):
358
387
  """
359
388
  Get list of content from current namespace.
@@ -362,11 +391,7 @@ class IndexifyClient:
362
391
  - parent_id (str): Optional filter for parent id
363
392
  - labels_eq (str): Optional filter for labels
364
393
  """
365
- params = {}
366
- if parent_id:
367
- params.update({"parent_id": parent_id})
368
- if labels_eq:
369
- params.update({"labels_eq": labels_eq})
394
+ params = {"parent_id": content_id}
370
395
 
371
396
  response = self.get(f"namespaces/{self.namespace}/content", params=params)
372
397
  response.raise_for_status()
@@ -390,7 +415,7 @@ class IndexifyClient:
390
415
  raise ApiException(exc.response.text)
391
416
 
392
417
  def add_documents(
393
- self, documents: Union[Document, str, List[Union[Document, str]]]
418
+ self, documents: Union[Document, str, List[Union[Document, str]]], doc_id=None
394
419
  ) -> None:
395
420
  """
396
421
  Add documents to current namespace.
@@ -401,14 +426,14 @@ class IndexifyClient:
401
426
  if isinstance(documents, Document):
402
427
  documents = [documents]
403
428
  elif isinstance(documents, str):
404
- documents = [Document(documents, {})]
429
+ documents = [Document(documents, {}, id=doc_id)]
405
430
  elif isinstance(documents, list):
406
431
  new_documents = []
407
432
  for item in documents:
408
433
  if isinstance(item, Document):
409
434
  new_documents.append(item)
410
435
  elif isinstance(item, str):
411
- new_documents.append(Document(item, {}))
436
+ new_documents.append(Document(item, {}, id=None)) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
412
437
  else:
413
438
  raise ValueError(
414
439
  "List items must be either Document instances or strings."
@@ -419,7 +444,7 @@ class IndexifyClient:
419
444
  "Invalid type for documents. Expected Document, str, or list of these."
420
445
  )
421
446
 
422
- req = {"documents": documents}
447
+ req = {"documents": [doc._asdict() for doc in documents]}
423
448
  response = self.post(
424
449
  f"namespaces/{self.namespace}/add_texts",
425
450
  json=req,
@@ -453,7 +478,7 @@ class IndexifyClient:
453
478
  response = self.put(f"namespaces/{self.namespace}/content/{document_id}", files={"file": f}, timeout=None)
454
479
  response.raise_for_status()
455
480
 
456
- def get_metadata(self, content_id: str) -> dict:
481
+ def get_structured_data(self, content_id: str) -> dict:
457
482
  """
458
483
  Query metadata for a specific content ID in a given index.
459
484
 
@@ -464,7 +489,7 @@ class IndexifyClient:
464
489
  response.raise_for_status()
465
490
  return response.json().get("metadata",[])
466
491
 
467
- def search_index(self, name: str, query: str, top_k: int) -> list[TextChunk]:
492
+ def search_index(self, name: str, query: str, top_k: int, filters: List[str] = None) -> list[TextChunk]:
468
493
  """
469
494
  Search index in the current namespace.
470
495
 
@@ -472,8 +497,11 @@ class IndexifyClient:
472
497
  - name (str): name of index to search
473
498
  - query (str): query string
474
499
  - top_k (int): top k nearest neighbors to be returned
500
+ - filters (List[str]): list of filters to apply
475
501
  """
476
- req = {"index": name, "query": query, "k": top_k}
502
+ if filters is None:
503
+ filters = []
504
+ req = {"index": name, "query": query, "k": top_k, "filters": filters}
477
505
  response = self.post(
478
506
  f"namespaces/{self.namespace}/search",
479
507
  json=req,
@@ -482,17 +510,23 @@ class IndexifyClient:
482
510
  response.raise_for_status()
483
511
  return response.json()["results"]
484
512
 
485
- def upload_file(self, path: str):
513
+ def upload_file(self, path: str, id=None, labels: dict = {}):
486
514
  """
487
515
  Upload a file.
488
516
 
489
517
  Args:
490
518
  - path (str): relative path to the file to be uploaded
519
+ - labels (dict): labels to be associated with the file
491
520
  """
521
+ params={}
522
+ if id is not None:
523
+ params['id'] = id
492
524
  with open(path, "rb") as f:
493
525
  response = self.post(
494
526
  f"namespaces/{self.namespace}/upload_file",
495
527
  files={"file": f},
528
+ data=labels,
529
+ params=params,
496
530
  timeout=None,
497
531
  )
498
532
  response.raise_for_status()
@@ -535,4 +569,27 @@ class IndexifyClient:
535
569
  )
536
570
  response.raise_for_status()
537
571
  return response.json()
572
+
573
+ def generate_unique_hex_id(self):
574
+ """
575
+ Generate a unique hexadecimal identifier
576
+
577
+ Returns:
578
+ str: a unique hexadecimal string
579
+ """
580
+ return uuid.uuid4().hex[:16]
581
+
582
+ def generate_hash_from_string(self, input_string: str):
583
+ """
584
+ Generate a hash for the given string and return it as a hexadecimal string.
585
+
586
+ Args:
587
+ input_string (str): The input string to hash.
588
+
589
+ Returns:
590
+ str: The hexadecimal hash of the input string.
591
+ """
592
+ hash_object = hashlib.sha256(input_string.encode())
593
+ return hash_object.hexdigest()[:16]
594
+
538
595
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.14
3
+ Version: 0.0.16
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -1,5 +1,5 @@
1
1
  indexify/__init__.py,sha256=Sz6zkAIHsPOi0rG5RM7dVkXGDa0fO2uurD6vS4Qo15E,312
2
- indexify/client.py,sha256=sT4tcSuR3wQBF0yYStBRva3xUfe15X6GjZaViiRX2sA,16944
2
+ indexify/client.py,sha256=ZDirw1O46nRx0WBgB95jvpkd4LdAjgZnlQ_2A673_cI,19047
3
3
  indexify/data_containers.py,sha256=r1wxJPtsmXbyKvb17fqxm-dPjKz51oZ62f8A8Zxls1c,361
4
4
  indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
5
5
  indexify/extraction_policy.py,sha256=vKVHT8jSjzhUaKqWpewOGkYojMBplvGdSm9zoSN9Pcg,750
@@ -7,7 +7,7 @@ indexify/extractor.py,sha256=KMcP9xopHJRBzeSxalztGGTBvOzVKRFEsJynV-hLRSc,1175
7
7
  indexify/index.py,sha256=RvxYhJXEth-GKvqzlMiz5PuN1eIbZk84pt20piA1Gsw,504
8
8
  indexify/settings.py,sha256=yzWAEZkrTjykSMj3hrFU7l_jUoUCOUsgPVW1nU-qzJQ,46
9
9
  indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
10
- indexify-0.0.14.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
- indexify-0.0.14.dist-info/METADATA,sha256=ANdIdnRnC6ISLYc1oTUb-BE-NjdgUg0iEu0dHRmonnI,1714
12
- indexify-0.0.14.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
- indexify-0.0.14.dist-info/RECORD,,
10
+ indexify-0.0.16.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
11
+ indexify-0.0.16.dist-info/METADATA,sha256=h_tYmLlbYT0g_9SJnec9hgey8AYP0VTKascytOt0_jE,1714
12
+ indexify-0.0.16.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
+ indexify-0.0.16.dist-info/RECORD,,